def saveStats(self,
                  path=".",
                  file_name="stats.dat",
                  delim=None,
                  overwrite=True):
        """
        Save stats file into the file

        Parameters
        ----------
        path : str
            Path to the output file location

        file_name : str
            Name of the file

        delim : str
            Delimiter of columns

        overwrite : bool
            Overwrite file if it exists

        Returns
        -------
            None
        """
        if not delim:
            delim = "\t"
        StatusResolver.save_query(self.stats, file_name, path, delim,
                                  overwrite)
Esempio n. 2
0
def getStarsFromRemoteDb(query, query_path):
    """
    This method parsing the query text in order to return desired stars
    from remote database.

    Parameters
    -----------
        query : str
            Query text contains db_key and query file separated by ':'

    Returns
    --------
        List of Star objects

    Example
    -------
        _getStarsFromRemoteDb("OgleII:query_file.txt") --> [Star objects]

        query_file.txt:
            #starid;field;target
            1;1;lmc
            10;1;smc
    """

    try:
        db_key, query_file = query.split(":")
    except:
        QueryInputError(
            "Key for resolving stars source was not recognized:\n%s" % query)

    queries = StatusResolver(os.path.join(query_path, query_file)).getQueries()

    stars = []
    for query in progressbar(queries, "Querying stars: "):
        starsProvider = StarsProvider().getProvider(obtain_method=db_key,
                                                    obtain_params=query)

        stars += starsProvider.getStars()

    return stars
def main(project_settings, argv=None):
    program_info = """ABOUT
    The program downloads light curves from astronomical databases
    which pass thru given filters (or all).

    Database to query:
    ------------------
        Database is specified by '-d' and name of connector class.
        
        Note:
            There is a overview of available connectors at the end (if it is
            launched from command line without parameters)
        
    
    Status file:
    ------------
        Queries can be specified in the file where first
        row starts with '#' and then there are keys for query a database.
        Next rows consist of searched values. All columns are separated
        by ';' (can be changed in settings).
        
        Note:
            Example files can be find in data/inputs/examples
        
    Getting filter:
    ---------------
        Filter is loaded from prepared filter object (learned). If it is desired
        to load filter with certain parameters it can be also created by
        tuning tool by giving one combination of parameters.
        
        Note:
            All classes which inherits BaseFilter class located
            in the filters_imp package are considered as filters.
            
                
    Data folder hierarchy:
    -----------------------
        Next to src/ (source) folder there is a data/ folder where all data files
        are saved. All input/outputs are loaded/saved into a folder in data/.
        
        This behaviour can be suppressed by entering word 'HERE:'
        (e.g. 'HERE:path_of_the_file_with_its_name'). It forces to take relative
        path from the directory of executing the script.
        
        There are 5 main folders:
          
            1. data/inputs/
                Location of files of queries and files fro tuning parameters 
            
            2. data/light_curves/
                Location of light curve subfolders. 
            
            3. data/star_filters/
                Location where tuned filters is saved (or can be loaded by
                filter_lcs script)
            
            4. data/tuning_logs/
                Location of output files from tuning - statistic for every combination
                of parameters, graphs (probability distribution with train objects
                and histograms).
            
            5. data/databases/
                Location of local db files (sqlite).
        
    
    Running the program:
    -------------------
        By executing the script all inputs are verified and database is queried.
        Light curves (if any) of stars passed thru filtering are saved into
        'data/light_curves/' + your folder(specified via '-o') and stars are
        saved into local database. So it is possible to load them with their
        values or filter them by other filters.
        Also result file is saved into the folder with light curves in format
        'connector_name'_'filter_name'.txt. 
        
        (TODO)
        It is possible to continue with unfinished query. If query file has
        three more columns generated during the filtering about status of
        particular queries the program will find last finished query and it will
        continues form that point.

    Examples
    --------
        *Just downloading a light curves:
        
            For Ogle query file (named query.txt):
                #starid;field_num;target
                1;1;lmc
                12;1;lmc
            
            ./filter_stars.py -i query.txt -o my_lc_folder -d "OgleII"
        
            The light curves and status file will be saved into "data/light_curves/my_lc_folder" folder.
           
       
            
        *With filtering
        
            It is possible to insert more then one filter by adding combination
            '-f' + filter_name multiple times as is shown in example below.
            
            A command for executing searching light curves in OGLE database
            with filtering:
            
            ./filter_stars.py -i query.txt -o out/ -d "OgleII" -f abbe_filter.conf -f vario_slope.pickel
        """

    program_name = os.path.basename(sys.argv[0])
    program_version = "v0.2"
    program_build_date = "%s" % __updated__

    program_version_string = '%%prog %s (%s)' % (program_version,
                                                 program_build_date)
    program_longdesc = "Run script without params to get info about the program and list of available databases"
    program_license = "Copyright 2016 Martin Vo"

    if argv is None:
        argv = sys.argv[1:]
    try:
        # setup option parser
        parser = OptionParser(version=program_version_string,
                              epilog=program_longdesc,
                              description=program_license)
        parser.add_option("-r",
                          "--run",
                          dest="run",
                          help="Name of this run (name of folder for results)",
                          type=str)

        parser.add_option(
            "-q",
            "--query",
            dest="query",
            help="Name of the query file in %PROJECT_DIR/queries")

        parser.add_option("-d",
                          "--database",
                          dest="db",
                          help="Searched database")

        parser.add_option(
            "-s",
            "--coords",
            dest="save_coords",
            default="y",
            help="Save params coordinates of inspected stars if 'y'.")

        parser.add_option(
            "-f",
            "--filter",
            dest="filt",
            action="append",
            default=[],
            help=
            "Name of the filter file in filters folder (%PROJECT_DIR/filters)")

        # process options
        opts, args = parser.parse_args(argv)

        if not len(argv):
            print program_info, "\n"
            print json.dumps(StarsProvider().STARS_PROVIDERS.keys())
            print "Run with '-h' in order to show params help\n"
            return False

        if opts.db not in StarsProvider().STARS_PROVIDERS:
            print "Error: " + "Unresolved database %s \n" % opts.db
            print json.dumps(StarsProvider().STARS_PROVIDERS.keys())
            return False

        # -------    Core    ------

        header = "#" + " " * 40 + \
            "Light Curves Classifier - Filter stars" + " " * 30 + "#"
        print "\n\n\t" + "#" * len(header)
        print "\t#" + " " * (len(header) - 2) + "#"
        print "\t" + header
        print "\t#" + " " * (len(header) - 2) + "#"

        UNFOUND_LIM = 2

        print "Loading query..."
        try:
            resolver = StatusResolver(status_file_path=os.path.join(
                project_settings.QUERIES, opts.query))
            queries = resolver.getQueries()
        except IOError:
            raise IOError("Query file was not found")
        except Exception as e:
            print "Err:", e
            raise QueryInputError("There is an issue in query file")

        print "Loading filters"
        star_filters = [
            FiltersSerializer(filt_name,
                              project_settings.FILTERS).loadFilter()
            for filt_name in opts.filt
        ]

        if not star_filters:
            filt_txt = ""
        else:
            filt_txt = [filt.__class__.__name__ for filt in star_filters]

        if opts.save_coords == "y":
            save_coords = True
        else:
            save_coords = False

        prepare_run(project_settings.RESULTS, opts.run)

        print _sum_txt(opts.db, len(resolver.status_queries), filt_txt)

        searcher = StarsSearcher(
            star_filters,
            save_path=os.path.join(project_settings.RESULTS, opts.run, "lcs"),
            save_lim=1,
            stat_file_path=os.path.join(project_settings.RESULTS, opts.run,
                                        "query_status.txt"),
            obth_method=opts.db,
            unfound_lim=UNFOUND_LIM,
            save_coords=save_coords)
        searcher.queryStars(queries)

    except Exception, e:
        print e, "\n\n"
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2
Esempio n. 4
0
def main(project_settings, argv=None):
    """Command line options."""

    program_info = """ABOUT
    
    The program creates query files or files of parameters to tune. Name of output
    file is specified by '-o' option. This file will be created in data/inputs/.
    
    Name of parameters are specified by '-p' and their ranges via '-r'. Format
    of ranges is: from_number:to_number:step_number (i.e. 1:10:2 means from
    1 to 10 with step size 2 --> 1,3,5..). It is not necessary to specify step,
    in that  case step will be taken as 1.
        
        Example:
        
        ./prepare_query.py -o TestQuery.txt -p starid -r 5:12:3 -p field -r 1:3 -p target -r lmc,smc
        
        --> generates
        
        #starid;target;field
        5;lmc;1
        5;smc;1
        5;lmc;2
        5;smc;2
        8;lmc;1
        8;smc;1
        8;lmc;2
        8;smc;2
        11;lmc;1
        11;smc;1
        11;lmc;2
        11;smc;2

        
        """

    program_name = os.path.basename(sys.argv[0])
    program_version = "v0.1"
    program_build_date = "%s" % __updated__

    program_version_string = '%%prog %s (%s)' % (program_version,
                                                 program_build_date)
    program_longdesc = "Run script without paramas to get info about the program."
    program_license = "Copyright 2016 Martin Vo"

    # Separator for range keys input text
    RANGES_SEPARATOR = ":"

    ENUM_SYMBOL = ","

    if argv is None:
        argv = sys.argv[1:]
    try:
        logging.info("Creating query file with these params {}".format(argv))
        # setup option parser
        parser = OptionParser(version=program_version_string,
                              epilog=program_longdesc,
                              description=program_license)
        parser.add_option(
            "-o",
            "--output",
            dest="output",
            default="my_query.txt",
            help="Name of the query file which will be created in data/inputs")
        parser.add_option("-p",
                          "--param",
                          dest="param",
                          action="append",
                          default=[],
                          help="Parameter name which will be generated")
        parser.add_option(
            "-r",
            "--range",
            dest="range",
            action="append",
            default=[],
            help=
            "Range of parameters separated by ':' - from_num:to_num:step_num.")
        parser.add_option("-d",
                          "--delim",
                          dest="delim",
                          default=";",
                          help="Delimiter for the output file")
        parser.add_option("-f",
                          "--folder",
                          dest="folder",
                          default=".",
                          help="Path where the query file will be saved")

        # process options
        opts, args = parser.parse_args(argv)

        if not len(argv):
            print(program_info, "\n")
            print("Run with '-h' in order to show params help\n")
            return False

        ranges = opts.range
        params = opts.param

        if not len(params) == len(ranges):
            raise QueryInputError(
                "Number of parameters and ranges have to be the same")

        x = []
        for i in range(len(params)):
            just_one = False
            enum = _enumeration(ranges[i], ENUM_SYMBOL)
            if not enum:
                parts = ranges[i].split(RANGES_SEPARATOR)

                n = len(parts)

                if n == 1:
                    just_one = True

                elif n == 2:
                    step = 1

                elif n > 3:
                    raise Exception(
                        "There cannot be more then three separators %s" %
                        RANGES_SEPARATOR)

                else:
                    step = parts[2]

                if not just_one:
                    from_n = parts[0]
                    to_n = parts[1]
                    try:
                        x.append(list(range(int(from_n), int(to_n),
                                            int(step))))
                    except:
                        x.append(
                            np.arange(float(from_n), float(to_n), float(step)))
                else:
                    x.append(parts)

            else:
                x.append(enum)

        query = get_combinations(params, *x)

        if opts.folder == "t":
            path = project_settings.TUN_PARAMS
        elif opts.folder == "q":
            path = project_settings.QUERIES
        else:
            path = opts.folder
        file_name = opts.output

        StatusResolver.save_query(query, file_name, path, opts.delim)

        print("\nDone.\nFile %s was saved into %s" % (file_name, path))

    except Exception as e:
        raise
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2
def main(project_settings, argv=None):
    '''Command line options.'''

    program_info = """ABOUT
    The program searches for the most optional parameters for given filters
    according to sample of searched and other train light curves.
    
    Getting stars
    -------------
        Stars can be obtained by three different ways resolved from query text
        according to format:
        
            1.QUERY:db_name:query_file_in_inputs_folder
                --> Remote database is queried (db key is name of connector class)
                
                    Example:
                        QUERY:OgleII:query_file.txt
                        
                    Note:
                        There is a overview of available database connectors
                        at the end (if it is launched from command line without
                        parameters)
            
            2.stars_folder_key:number or stars_folder_key%float_number or stars_folder_key
                --> Light curves from folder according to first key is loaded
                    (according to settings.STARS_PATH dictionary). All stars are
                    loaded if there is no number and ':', in case of integer after
                    ':' just this number of stars are loaded and if there are is a float
                    number after '%' this percentage number of all stars are loaded.
                    
                    Example:
                        quasars:10    or    be_stars%0.5    or    cepheids
                        
                    Note:
                        There is a overview of registered light curve locations 
                        at the end (if it is launched from command line without
                        parameters)
    
    Status file:
    ------------
        Parameters to try or queries can be specified in the file where first
        row starts with '#' and then there are names of parameters which can be used
        for finding the most optional parameters of a filter or as query for a database.
        Next rows consist of values to tune or queries. All columns are separated
        by ';' (can be changed in settings).
        
        Note:
            Example files can be find in data/inputs/examples
        
    Getting filter:
    ---------------
        Filter is loaded by name of the filter class in the filter package
        specified in settings.
            
            Note:
                All classes which inherits BaseFilter class located
                in the filters_imp package are considered as filters.
                
    Data folder hierarchy:
    -----------------------
        Next to src/ (source) folder there is a data/ folder where all data files
        are saved. All input/outputs are loaded/saved into a folder in data/.
        
        This behavior can be suppressed by entering word 'HERE:'
        (e.g. 'HERE:path_of_the_file_with_its_name'). It forces to take relative
        path from the directory of executing the script.
        
        There are 5 main folders:
          
            1. data/inputs/
                Location of files of queries and files fro tuning parameters
            
            2. data/light_curves/
                Location of light curve subfolders.
            
            3. data/star_filters/
                Location where tuned filters is saved (or can be loaded by
                filter_lcs script)
            
            4. data/tuning_logs/
                Location of output files from tuning - statistic for every combination
                of parameters, graphs (probability distribution with train objects
                and histograms).
            
            5. data/databases/
                Location of local db files (sqlite).
                
    Deciders:
    --------
        Deciders manage all learning and then recognizing of inspected objects.
        They can be loaded via name of their class.
    
        Note:
            There is a overview of implemented deciders at the end (if it is
            launched from command line without parameters)
            
            
    Running the program:
    -------------------
        By executing the script all inputs are verified. If everything is ok,
        all combinations of parameters are evaluated and the best is saved
        into data/stars_filters/ folder (if not specified otherwise).
        
        Records about tuning are saved into data/tuning_logs/ - plots of probability
        space and train objects, histograms for particular parameters and
        log file of statistic values about particular combinations.
        
        Note:
            Plot of probability space is created just in case of 2 prameters
            tuning.
            
      
    Examples
    ---------
        Example 1:
            File tuned_params.txt:
                #smooth_ratio
                0.2
                0.3
                0.5
                0.8
                0.9
                
            ./make_filter.py   -i tuned_params.txt
                                -f AbbeValueFilter
                                -s quasars:30
                                -c stars%0.5
                                -c cepheids
                                -o MyAbbeFilter
                                -d NeuronDecider
                
            In the example file above one row represents one combination of parameters (per column).
            Class name is AbbeValueFilter. Desired light curves are quasars (30
            of them are loaded) and they are trained on a "contamination sample"
            of ordinary stars (50 % of available light curves in the folder)
            and cepheids (all light curves in the folder).
            
        Example 2:
            File in/tuned_params_histvario.txt:
                #hist_days_per_bin;vario_days_per_bin;vario_alphabet_size;hist_alphabet_size      
                97;9;17;7
                80;8;16;7
            
            ./make_filter.py   -i tuned_params_histvario.txt
                                -f ComparingFilter
                                -s quasars:9
                                -c cepheids:7
                                -d GaussianNBDec
                                -o MyCompFilter
                                
            
            In the second example above there is a special case of tuning for ComparingFilter.
            In this case whole searched sample is not assigned as train sample, 
            but it's half (in this version, in future there will be more options)
            is taken as comparing sample - these stars will be compared with
            inspected stars.  
            
            Comparative filter is composed from subfilters which is resolved
            from file of tuning parameters. Subfilters which can be constructed
            from given parameters are used.
            
            Note:
                Subfilters specified how stars are compared (e.g. their histograms,
                shape of curves, varigram etc.).
                They are located in 'filters_impl' package (same as ordinary filters)
                and they are distinguished via heritage of 'ComparativeSubFilter'
                class.

                        
        """

    program_name = os.path.basename(sys.argv[0])
    program_version = "v0.2"
    program_build_date = "%s" % __updated__

    program_version_string = '%%prog %s (%s)' % (program_version,
                                                 program_build_date)
    program_longdesc = "Run script without paramas to get info about the program."
    program_license = "Copyright 2016 Martin Vo"

    DEF_FILT_NAME = "Unnamed"

    if argv is None:
        argv = sys.argv[1:]
    try:
        # setup option parser
        parser = OptionParser(version=program_version_string,
                              epilog=program_longdesc,
                              description=program_license)

        parser.add_option(
            "-i",
            "--input",
            dest="input",
            help=
            "Name of the file of tuning combinations (present in $PROJEC_DIR/inputs/tun_params)"
        )

        parser.add_option("-n",
                          "--name",
                          dest="filt_name",
                          help="Name of the filter")

        parser.add_option(
            "-f",
            "--descriptor",
            dest="descriptors",
            action="append",
            default=[],
            help="Descriptors (this key can be used multiple times)")

        parser.add_option(
            "-s",
            "--searched",
            dest="searched",
            action="append",
            default=[],
            help="Searched stars folder (present in $PROJEC_DIR/inp_lcs)")

        parser.add_option(
            "-c",
            "--contamination",
            dest="cont",
            action="append",
            default=[],
            help=
            "Contamination stars folder (present in $PROJEC_DIR/inputs/lcs)")

        parser.add_option(
            "-t",
            "--template",
            dest="template",
            action="append",
            default=[],
            help=
            "Template stars folder (present in $PROJEC_DIR/inputs/lcs) if comparative filters are used"
        )

        parser.add_option("-d",
                          "--decider",
                          dest="deciders",
                          default=[],
                          help="Decider for learning to recognize objects")

        parser.add_option("-p",
                          "--split",
                          dest="split_ratio",
                          default="3:1",
                          help="Split ratio for train-test sample")

        # process options
        opts, args = parser.parse_args(argv)

        if not len(argv):
            print program_info, "\n"
            print "Available databases:\n\t%s\n" % json.dumps(
                PackageReader().getClassesDict("connectors").keys(), indent=4)
            print "Available descriptors:\n\t%s\n" % json.dumps(
                PackageReader().getClassesDict("descriptors").keys(), indent=4)
            print "Available deciders:\n\t%s\n" % json.dumps(
                PackageReader().getClassesDict("deciders").keys(), indent=4)
            print "Run with '-h' in order to show params help\n"
            return False

        # -------    Core    ------
        try:
            descriptors = [
                desc for desc in PackageReader().getClasses("descriptors")
                if desc.__name__ in opts.descriptors
            ]

        except KeyError:
            raise Exception(
                "There are no descriptor %s.\nAvailable filters: %s" %
                (opts.filt, PackageReader().getClassesDict("descriptors")))
        if len(opts.descriptors) != len(descriptors):
            raise QueryInputError(
                "No all descriptors have been found. Got: %s\nFound: %s" %
                (opts.descriptors, descriptors))

        header = "#" + " " * 40 + \
            "Light Curves Classifier - Make Filter" + " " * 30 + "#"
        print "\n\n\t" + "#" * len(header)
        print "\t#" + " " * (len(header) - 2) + "#"
        print "\t" + header
        print "\t#" + " " * (len(header) - 2) + "#"
        print "\t" + "#" * len(
            header) + "\nSelected descriptors: " + ", ".join(
                [d.__name__ for d in descriptors])
        inp = os.path.join(project_settings.TUN_PARAMS, opts.input)

        try:
            _tuned_params = StatusResolver(status_file_path=inp).getQueries()
            tuned_params = parse_tun_query(_tuned_params)
        except IOError:
            raise Exception(
                "File of parameters combinations was not found:\n%s" % inp)

        if not tuned_params:
            raise QueryInputError("Empty parameters file")

        # TODO: Add check that tuned_paramters are these params needed to
        # construct filter.
        try:
            deciders = [
                desc for desc in PackageReader().getClasses("deciders")
                if desc.__name__ in opts.deciders
            ]
        except KeyError:
            raise Exception(
                "Unknown decider %s\nAvailable deciders: %s" %
                (opts.deciders, PackageReader().getClasses("deciders")))

        print "Selected deciders: " + ", ".join([d.__name__ for d in deciders])
        print "\nLoading stars..."
        searched = getStars(opts.searched,
                            project_settings.INP_LCS,
                            query_path=project_settings.QUERIES,
                            progb_txt="Querying searched stars: ")
        others = getStars(opts.cont,
                          project_settings.INP_LCS,
                          query_path=project_settings.QUERIES,
                          progb_txt="Querying contamination stars: ")
        print "Sample of %i searched objects and %i of contamination objects was loaded" % (
            len(searched), len(others))

        static_params = {}
        if opts.template:
            temp_stars = getStars(opts.template,
                                  project_settings.INP_LCS,
                                  query_path=project_settings.QUERIES)
        for desc in descriptors:
            if issubclass(desc, ComparativeBase):
                static_params[desc.__name__] = {}
                static_params[desc.__name__]["comp_stars"] = temp_stars

        filt_name = opts.filt_name
        if not filt_name:
            filt_name = DEF_FILT_NAME
        if "." in filt_name:
            filt_name = filt_name[:filt_name.rfind(".")]

        filter_path = os.path.join(project_settings.FILTERS, filt_name)

        d = tree()
        d[filt_name]

        rec(d, project_settings.FILTERS)

        save_params = {
            "roc_plot_path": filter_path,
            "roc_plot_name": "ROC_plot.png",
            "roc_plot_title": filt_name,
            "roc_data_path": filter_path,
            "roc_data_name": "ROC_data.dat",
            "stats_path": filter_path,
            "stats_name": "stats.dat"
        }

        try:
            ratios = [int(sp) for sp in opts.split_ratio.split(":")]
        except ValueError:
            raise ValueError(
                "Ratios have to be numbers separated by ':'. Got:\n%s" %
                opts.split_ratio)

        es = ParamsEstimator(searched=searched,
                             others=others,
                             descriptors=descriptors,
                             deciders=deciders,
                             tuned_params=tuned_params,
                             static_params=static_params,
                             split_ratio=ratios[0] / sum(ratios[:2]))

        print "\nTuning is about to start. There are %i combinations to try" % len(
            tuned_params)

        star_filter, _, _ = es.fit(_getPrecision, save_params=save_params)

        FiltersSerializer(filt_name + ".filter",
                          filter_path).saveFilter(star_filter)

        plotProbabSpace(star_filter,
                        opt="save",
                        path=filter_path,
                        file_name="ProbabSpace.png",
                        title="".join([d.__name__ for d in deciders]),
                        searched_coords=star_filter.searched_coords,
                        contaminatiom_coords=star_filter.others_coords)
        desc_labels = []
        for desc in star_filter.descriptors:
            if hasattr(desc.LABEL, "__iter__"):
                desc_labels += desc.LABEL
            else:
                desc_labels.append(desc.LABEL)

        plotHist(star_filter.searched_coords,
                 star_filter.others_coords,
                 labels=desc_labels,
                 save_path=filter_path,
                 file_name="CoordsDistribution")

        header = "\t".join(desc_labels)
        np.savetxt(os.path.join(project_settings.FILTERS, filt_name,
                                "searched_coords.dat"),
                   star_filter.searched_coords,
                   "%.3f",
                   header=header)
        np.savetxt(os.path.join(project_settings.FILTERS, filt_name,
                                "contam_coords.dat"),
                   star_filter.others_coords,
                   "%.3f",
                   header=header)

        print "\nIt is done.\n\t" + "#" * len(header)

    except Exception, e:
        if debug:
            raise
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2