예제 #1
0
    def update_missing_sql_fields(self):

        select = text("""
        SELECT valid_date FROM {table}
        WHERE file_id IS NOT NULL
        AND (field_max IS NULL OR field_min IS NULL)
        """.format(table=self.fingerprint_table))

        update = text("""
        UPDATE {table}
        SET field_max    = :field_max,
            field_min    = :field_min
        WHERE valid_date = :valid_date
        """.format(table=self.fingerprint_table))

        with cdsdb.begin() as connection:

            result = connection.execute(select)
            dates = [cdsdb.sql_to_datetime(x[0]) for x in result]

        count = 0

        for d in dates:
            count += 1
            values = self.array(d)

            # print('Update', d)
            with cdsdb.begin() as connection:
                connection.execute(update,
                                   field_max=np.amax(values),
                                   field_min=np.amin(values),
                                   valid_date=d)

        if count:
            print('update_missing_sql_fields', count)
예제 #2
0
    def sql_table(self):
        if self._sql_table is None:
            self._sql_table = 'methods'

            STMT = text("""
                CREATE TABLE IF NOT EXISTS {table} (
                      name    VARCHAR(255),
                      param   VARCHAR(255),
                      domain  VARCHAR(255),
                      dataset VARCHAR(255),
                      alpha   REAL,
                      score   REAL,
                      seed    REAL,
                      CONSTRAINT {table}_inx UNIQUE (name, param, domain, dataset)
                );
                """.format(table=self._sql_table))

            with cdsdb.begin() as connection:
                connection.execute(STMT)

            try:
                with cdsdb.begin() as connection:
                    pass
            except Exception as e:
                print(e)

        return self._sql_table
예제 #3
0
    def seed(self, valid_dates):

        insert = text("""
            INSERT INTO {table} (valid_date) VALUES (:valid_date)
            ON CONFLICT DO NOTHING;
        """.format(table=self.fingerprint_table))

        with cdsdb.begin() as connection:

            for valid_date in valid_dates:
                connection.execute(insert, valid_date=valid_date)
예제 #4
0
 def mean(self):
     if self._mean is None:
         GET_MEAN = text(
             "SELECT mean FROM alpha where param=:param and domain=:domain and dataset=:dataset"
         )
         with cdsdb.begin() as connection:
             self._mean = connection.execute(GET_MEAN,
                                             param=self.param,
                                             domain=self.domain,
                                             dataset=self.dataset).scalar()
             if self._mean is None:
                 self._mean = 0.0
     return self._mean
예제 #5
0
    def fingerprints(self):

        STMT = text("""
            SELECT valid_date, fingerprint_r, fingerprint_s FROM {table}
            WHERE fingerprint_r IS NOT NULL
            AND fingerprint_s IS NOT NULL
            AND file_id IS NOT NULL
            """.format(table=self.fingerprint_table))

        with cdsdb.begin() as connection:
            result = connection.execute(STMT)
            return dict(
                (cdsdb.sql_to_datetime(d[0]), (d[1], d[2])) for d in result)
예제 #6
0
    def sql_dates(self):

        if self._sql_dates is None:

            STMT = text("""
            SELECT valid_date FROM {table}
            WHERE file_id IS NOT NULL
            ORDER BY valid_date
            """.format(table=self.fingerprint_table))

            with cdsdb.begin() as connection:
                result = connection.execute(STMT)
                self._sql_dates = [cdsdb.sql_to_datetime(x[0]) for x in result]
        return self._sql_dates
예제 #7
0
 def maximum(self):
     if self._maximum is None:
         GET_MAXIMUM = text(
             "SELECT maximum FROM alpha where param=:param and domain=:domain and dataset=:dataset"
         )
         with cdsdb.begin() as connection:
             self._maximum = connection.execute(
                 GET_MAXIMUM,
                 param=self.param,
                 domain=self.domain,
                 dataset=self.dataset).scalar()
             if self._maximum is None:
                 self._maximum = 0.0
     return self._maximum
예제 #8
0
 def smoothness2_average_no_constants(self):
     if self._smoothness2_average_no_constants is None:
         GET_MINIMUM = text(
             "SELECT smoothness2_average_no_constants FROM alpha where param=:param and domain=:domain and dataset=:dataset"
         )
         with cdsdb.begin() as connection:
             self._smoothness2_average_no_constants = connection.execute(
                 GET_MINIMUM,
                 param=self.param,
                 domain=self.domain,
                 dataset=self.dataset).scalar()
             if self._smoothness2_average_no_constants is None:
                 self._smoothness2_average_no_constants = 0.0
     return self._smoothness2_average_no_constants
예제 #9
0
    def sample(self, date=None):

        if date is not None:
            return self.grib(date)

        with cdsdb.begin() as connection:

            result = connection.execute(self.SELECT_FIRST_SAMPLE,
                                        valid_date=date)
            for path, offset in result:
                if os.path.exists(path):
                    return GribFile(path).at_offset(offset)

        return self.grib(datetime.date(2000, 1, 1))
예제 #10
0
 def max_fingerprint_distance(self):
     if self._max_fingerprint_distance is None:
         GET_ALPHA = text(
             "SELECT max_fingerprint_distance FROM alpha where param=:param and domain=:domain and dataset=:dataset"
         )
         with cdsdb.begin() as connection:
             self._max_fingerprint_distance = connection.execute(
                 GET_ALPHA,
                 param=self.param,
                 domain=self.domain,
                 dataset=self.dataset).scalar()
             if self._max_fingerprint_distance is None:
                 self._max_fingerprint_distance = 0.0
     return self._max_fingerprint_distance
예제 #11
0
 def stddev(self):
     if self._stddev is None:
         GET_STDDEV = text(
             "SELECT stddev FROM alpha where param=:param and domain=:domain and dataset=:dataset"
         )
         with cdsdb.begin() as connection:
             self._stddev = connection.execute(
                 GET_STDDEV,
                 param=self.param,
                 domain=self.domain,
                 dataset=self.dataset).scalar()
             if self._stddev is None:
                 self._stddev = 0.0
     return self._stddev
예제 #12
0
    def alpha(self, alpha):
        SET_ALPHA = text("""
            INSERT INTO {table} (name, param, domain, dataset, alpha)
            VALUES (:name, :param, :domain, :dataset, :alpha)
            ON CONFLICT (name, param, domain, dataset)
            DO UPDATE SET alpha=:alpha
            """.format(table=self.sql_table))

        with cdsdb.begin() as connection:
            connection.execute(SET_ALPHA,
                               name=self.name,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               alpha=alpha)
예제 #13
0
    def score(self, score):
        SET_ALPHA = text("""
            INSERT INTO {table} (name, param, domain, dataset, score)
            VALUES (:name, :param, :domain, :dataset, :score)
            ON CONFLICT (name, param, domain, dataset)
            DO UPDATE SET score=:score
            """.format(table=self.sql_table))

        with cdsdb.begin() as connection:
            connection.execute(SET_ALPHA,
                               name=self.name,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               score=score)
예제 #14
0
    def seed(self, seed):
        SET_SEED = text("""
            INSERT INTO {table} (name, param, domain, dataset, seed)
            VALUES (:name, :param, :domain, :dataset, :seed)
            ON CONFLICT (name, param, domain, dataset)
            DO UPDATE SET seed=:seed
            """.format(table=self.sql_table))

        with cdsdb.begin() as connection:
            connection.execute(SET_SEED,
                               name=self.name,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               seed=seed)
예제 #15
0
 def alpha(self):
     GET_ALPHA = text("""
           SELECT alpha FROM {table}
           where name=:name
           and param=:param
           and domain=:domain
           and dataset=:dataset""".format(table=self.sql_table))
     with cdsdb.begin() as connection:
         result = connection.execute(GET_ALPHA,
                                     name=self.name,
                                     param=self.param,
                                     domain=self.domain,
                                     dataset=self.dataset).scalar()
         if result is None:
             result = 0.5
     return result
예제 #16
0
    def smoothness2_average(self, smoothness2_average):
        self._smoothness2_average = smoothness2_average
        SET_MINIMUM = text("""
            INSERT INTO alpha (param, domain, dataset, smoothness2_average)
            VALUES (:param, :domain, :dataset, :smoothness2_average)
            ON CONFLICT (param, domain, dataset)
            DO UPDATE SET smoothness2_average=:smoothness2_average
            """)

        with cdsdb.begin() as connection:
            connection.execute(SET_MINIMUM,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               smoothness2_average=smoothness2_average)
        return self._smoothness2_average
예제 #17
0
    def file_table(self):
        if self._file_table is None:
            self._file_table = "file_{param}_{domain}_{dataset}".format(
                param=self.param, domain=self.domain, dataset=self.dataset)

            STMT = text("""
                CREATE TABLE IF NOT EXISTS {table} (
                    id   {increment},
                    path TEXT UNIQUE NOT NULL --CHECK (path <> '')
                );
                """.format(table=self._file_table,
                           increment=cdsdb.sql_autoincrement))
            with cdsdb.begin() as connection:
                connection.execute(STMT)

        return self._file_table
예제 #18
0
    def maximum(self, maximum):
        self._maximum = maximum
        SET_MAXIMUM = text("""
            INSERT INTO alpha (param, domain, dataset, maximum)
            VALUES (:param, :domain, :dataset, :maximum)
            ON CONFLICT (param, domain, dataset)
            DO UPDATE SET maximum=:maximum
            """)

        with cdsdb.begin() as connection:
            connection.execute(SET_MAXIMUM,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               maximum=maximum)
        return self._maximum
예제 #19
0
    def stddev(self, stddev):
        self._stddev = stddev
        SET_STDDEV = text("""
            INSERT INTO alpha (param, domain, dataset, stddev)
            VALUES (:param, :domain, :dataset, :stddev)
            ON CONFLICT (param, domain, dataset)
            DO UPDATE SET stddev=:stddev
            """)

        with cdsdb.begin() as connection:
            connection.execute(SET_STDDEV,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               stddev=stddev)
        return self._stddev
예제 #20
0
    def mean(self, mean):
        self._mean = mean
        SET_MEAN = text("""
            INSERT INTO alpha (param, domain, dataset, mean)
            VALUES (:param, :domain, :dataset, :mean)
            ON CONFLICT (param, domain, dataset)
            DO UPDATE SET mean=:mean
            """)

        with cdsdb.begin() as connection:
            connection.execute(SET_MEAN,
                               param=self.param,
                               domain=self.domain,
                               dataset=self.dataset,
                               mean=mean)
        return self._mean
예제 #21
0
    def max_fingerprint_distance(self, max_fingerprint_distance):
        self._max_fingerprint_distance = max_fingerprint_distance
        SET_ALPHA = text("""
            INSERT INTO alpha (param, domain, dataset, max_fingerprint_distance)
            VALUES (:param, :domain, :dataset, :max_fingerprint_distance)
            ON CONFLICT (param, domain, dataset)
            DO UPDATE SET max_fingerprint_distance=:max_fingerprint_distance
            """)

        with cdsdb.begin() as connection:
            connection.execute(
                SET_ALPHA,
                param=self.param,
                domain=self.domain,
                dataset=self.dataset,
                max_fingerprint_distance=max_fingerprint_distance)
        return self._max_fingerprint_distance
예제 #22
0
    def grib_path_offset(self, date):
        with cdsdb.begin() as connection:

            date = cdsdb.sql_to_datetime(date)

            # print(self.SELECT_SAMPLE, date)
            result = connection.execute(self.SELECT_SAMPLE, valid_date=date)

            for path, offset in result:
                if os.path.exists(path):
                    return (path, offset)
                else:
                    print(path, 'does not exists')

        print('Not found', self, date)

        return self.retrieve(date)
예제 #23
0
    def fingerprint_table(self):
        if self._fingerprint_table is None:
            self._fingerprint_table = "fingerprint_{param}_{domain}_{dataset}".format(
                param=self.param, domain=self.domain, dataset=self.dataset)

            STMT = text("""
                CREATE TABLE IF NOT EXISTS {table} (
                    valid_date        TIMESTAMP NOT NULL UNIQUE,

                    -- Fingerprint
                    fingerprint_s          INTEGER , -- should be smallint, but smallint is signed
                    fingerprint_r          REAL    , -- mean

                    field_min REAL,
                    field_max REAL,
                    -- FILE

                    file_id           INTEGER, -- REFERENCES files(id),
                    position          BIGINT,

                    -- Updated
                    updated           TIMESTAMP NOT NULL DEFAULT ({now})
                );
                """.format(table=self._fingerprint_table, now=cdsdb.sql_now))
            with cdsdb.begin() as connection:
                connection.execute(STMT)

            # for col in ('field_min', 'field_max'):
            #     try:
            #         with cdsdb.begin() as connection:
            #             alter = "alter table {table} add column {col} real".format(table=self._fingerprint_table, col=col)
            #             connection.execute(text(alter))
            #     except Exception as e:
            #         print(e)
            #         pass

        return self._fingerprint_table
예제 #24
0
def mars_request_for_missing_fields(args):

    assert args.param
    assert args.target

    f = Field(args.param)

    query_0 = text("""
    select * from {table} ;
    """.format(table=f.file_table))

    query_10 = text("""
    update {table} set file_id=null where file_id=:file_id;
    """.format(table=f.fingerprint_table))

    query_11 = text("""
    delete from {table} where id=:file_id;
    """.format(table=f.file_table))

    missing = set()
    with cdsdb.begin() as connection:
        for e in connection.execute(query_0):
            if not os.path.exists(e[1]):
                print("MISSING file %s" % (e[1], ))
                missing.add(e[0])

    if missing:
        print("CLEANUP MISSING:", len(missing))
        missing = list(missing)[:500]
        with cdsdb.begin() as connection:
            for m in missing:
                connection.execute(query_10, file_id=m)
                connection.execute(query_11, file_id=m)
        print("CLEANUP MISSING:", len(missing))

    args.target = os.path.realpath(args.target)

    query_2 = text("""
    select valid_date from {table} where file_id is null
    order by updated limit :limit;
    """.format(table=f.fingerprint_table))

    retriever = Param.lookup(f.param).retriever(cdsdb)
    retriever.domain(Domain.lookup(f.domain))
    retriever.dataset(Dataset.lookup(f.dataset))

    dates = set()
    times = set()
    valid_dates = []

    with cdsdb.begin() as connection:

        for valid_date in connection.execute(query_2, limit=366):
            d = valid_date[0]
            dates.add(cdsdb.sql_date_to_yyyymmdd(d))
            times.add(cdsdb.sql_date_to_hhmm(d))
            valid_dates.append(d)

    # dates = list(dates)[:400]
    # times = ['12']

    retriever.dates(list(dates))
    retriever.times(list(times))

    retriever.execute(args.target)

    if not os.path.exists(args.target):
        print("%s does not exists, skipped" % (args.target, ))
    else:
        f.index_grib_file(args.target)

    insql, vals = cdsdb.sql_in_statement('valid_dates', valid_dates)

    query_6 = text("""
            update {table}
              set updated={now}
            where valid_date in {insql};
        """.format(table=f.fingerprint_table, now=cdsdb.sql_now, insql=insql))
    # print(query_6)
    with cdsdb.begin() as connection:
        connection.execute(query_6, **vals)
예제 #25
0
    def index_grib_file(self, target):
        insert_files = text("""
        INSERT INTO {table} (path) VALUES (:path)
        --ON CONFLICT (path) DO NOTHING -- 9.5
        """.format(table=self.file_table))

        select_file_id = text("""
        SELECT id FROM {table} WHERE path=:path
        """.format(table=self.file_table))

        # query_7 = text("""
        # update {table}
        #   set file_id       = :file_id,
        #       position      = :position,
        #       fingerprint_r = :fingerprint_r,
        #       fingerprint_s = :fingerprint_s
        # where valid_date = :valid_date
        # """.format(table=self.fingerprint_table))

        query_7 = text("""
        INSERT INTO {table} (file_id,
                             position,
                             fingerprint_r,
                             fingerprint_s,
                             field_max,
                             field_min,
                             valid_date)
        VALUES(:file_id, :position, :fingerprint_r, :fingerprint_s, :field_max, :field_min, :valid_date)
        ON CONFLICT(valid_date) DO UPDATE
        SET file_id         = :file_id,
              position      = :position,
              fingerprint_r = :fingerprint_r,
              fingerprint_s = :fingerprint_s,
              field_max     = :field_max,
              field_min     = :field_min
        """.format(table=self.fingerprint_table))

        n = 0
        with cdsdb.begin() as connection:
            connection.execute(insert_files, path=target)
            fileid = connection.execute(select_file_id, path=target).scalar()
            assert fileid is not None

            for g in GribFile(target):

                d = dict(file_id=fileid,
                         valid_date=g.valid_date,
                         position=int(g.offset))

                finger = FingerPrint(g.array, depth=3)

                finger.to_db(d)
                # print(query_7)
                d['field_max'] = np.amax(g.array)
                d['field_min'] = np.amin(g.array)
                # print(d)

                connection.execute(query_7, **d)
                n += 1

        print(self, 'added', n, 'field(s)')