예제 #1
0
def build_database(catalogue, foldername, indices=(), extra_function=None):
    print
    print "Building database for {} ({})".format(catalogue, foldername)
    t1 = time.time()
    files = get_files(catalogue, foldername)

    datadicts = parse_readme(foldername)
    db = SkyMapDatabase()
    for f, dds in datadicts.items():
        table = "{}_{}".format(foldername, f.split(".")[0])
        db.drop_table(table)

        columns = []
        lc_columns = []
        datatypes = []
        for dd in dds:
            c = dd["label"]

            # Check for columns that have equivalent names
            i = 1
            while c.lower() in lc_columns:
                if i == 1:
                    c += "_1"
                else:
                    c = c[:-2] + "_{}".format(i)
                i += 1

            lc_columns.append(c.lower())
            columns.append(c)
            datatypes.append(dd['format'])

        db.create_table(table, columns, datatypes)

        real_files = [fn for fn in files if fn.startswith(f)]
        for real_file in real_files:
            parse_datafile(db, foldername, real_file, table, dds, columns)
        for ind in indices:
            if ind in columns:
                db.add_index(table, ind)

    t2 = time.time()
    print
    print
    print "Time: {} s".format(t2 - t1)

    if extra_function:
        extra_function()
예제 #2
0
def split_tyc():
    db = SkyMapDatabase()
    db.commit_query("""
        ALTER TABLE hiptyc_tyc_main
        ADD COLUMN `TYC1` INT AFTER `TYC`,
        ADD COLUMN `TYC2` INT AFTER `TYC1`,
        ADD COLUMN `TYC3` INT AFTER `TYC2`
    """)

    db.commit_query("""DROP FUNCTION IF EXISTS SPLIT_TYC""")

    db.commit_query("""
        CREATE FUNCTION SPLIT_TYC(str VARCHAR(255), pos INT) RETURNS INT
        BEGIN
            SET str = TRIM(str);
            WHILE INSTR(str, '  ') > 0 DO
                SET str = REPLACE(str, '  ', ' ');
            END WHILE;
            SET str = REPLACE(
                SUBSTRING(
                    SUBSTRING_INDEX(str, ' ', pos), 
                    CHAR_LENGTH(
                        SUBSTRING_INDEX(str, ' ', pos - 1)
                    ) + 1
                )
                , ' ', ''
            );
            RETURN CAST(str AS UNSIGNED);
        END;
    """)

    db.commit_query("""
        UPDATE hiptyc_tyc_main
        SET 
          TYC1=SPLIT_TYC(TYC, 1), 
          TYC2=SPLIT_TYC(TYC, 2), 
          TYC3=SPLIT_TYC(TYC, 3)
    """)

    db.add_index("hiptyc_tyc_main", "TYC1")
    db.add_index("hiptyc_tyc_main", "TYC2")
    db.add_index("hiptyc_tyc_main", "TYC3")
    db.add_multiple_column_index("hiptyc_tyc_main", ("TYC1", "TYC2", "TYC3"),
                                 "TYC",
                                 unique=True)
예제 #3
0
def split_tyc():
    db = SkyMapDatabase()
    db.commit_query("""
        ALTER TABLE hiptyc_tyc_main
        ADD COLUMN `TYC1` INT AFTER `TYC`,
        ADD COLUMN `TYC2` INT AFTER `TYC1`,
        ADD COLUMN `TYC3` INT AFTER `TYC2`
    """)
    db.commit_query("""
        UPDATE hiptyc_tyc_main
        SET TYC1=CAST(substr(TYC, 1, 4) AS UNSIGNED), TYC2=CAST(substr(hiptyc_tyc_main.TYC, 5, 6) AS UNSIGNED), TYC3=CAST(substr(hiptyc_tyc_main.TYC, 11, 2) AS UNSIGNED)
    """)
    db.add_index("hiptyc_tyc_main", "TYC1")
    db.add_index("hiptyc_tyc_main", "TYC2")
    db.add_index("hiptyc_tyc_main", "TYC3")
    db.add_multiple_column_index("hiptyc_tyc_main", ("TYC1", "TYC2", "TYC3"),
                                 "TYC",
                                 unique=True)
예제 #4
0
def build_database(catalogue, foldername, indices=(), extra_function=None):
    """Downloads the datafiles for a catalog and builds a local database for it.

    Args:
        catalogue (str): the name of the catalog
        foldername (str): the folder where to save the data
        indices (list): the columns to generate indices for
        extra_function (function): a function to call after the database is built
    """
    print()
    print(f"Building database for {catalogue} ({foldername})")
    t1 = time.time()

    files = download_files(catalogue, foldername)
    datadicts = parse_readme(foldername)
    db = SkyMapDatabase()

    column_name_dict = {}

    for filename, coldefs in datadicts.items():
        datatypes = [coldef['format'] for coldef in coldefs]
        # SQL is case insensitive, and Vizier sometimes has column names in the same file that
        # have equivalent names. So, the column names are checked and updated when needed.
        column_names = []
        for coldef in coldefs:
            column_name = coldef["label"]
            i = 1
            lowercase_column_names = [x.lower() for x in column_names]
            while column_name.lower() in lowercase_column_names:
                if i > 1:
                    column_name = column_name[:-2]
                column_name += "_{}".format(i)
                i += 1

            column_names.append(column_name)
        table = "{}_{}".format(foldername, filename.split(".")[0])
        column_name_dict[table] = column_names

        # Clear the database table
        db.drop_table(table)
        db.create_table(table, column_names, datatypes)

        # For large catalogs, the data can be spread over multiple files, so loop over all files
        real_files = [fn for fn in files if fn.startswith(filename)]
        for real_file in real_files:
            parse_datafile(db, foldername, real_file, table, coldefs,
                           column_names)

    # Add indices
    for table, column_names in column_name_dict.items():
        for ind in indices:
            if ind in column_names:
                db.add_index(table, ind)

    t2 = time.time()
    print()
    print()
    print(f"Time: {t2-t1} s")

    if extra_function:
        extra_function()