Python read_sql Examples, pandas.io.sql.read_sql Python Examples

Example #1

0

Show file

File: indebtedness.py Project: YaoyueZhang/lendico

def get_scoring(con):
    scoring = psql.read_sql(""" 
    select * 
    from backend.scoring_result sr
    where sr.dwh_country_id=1 and sr.created_at>'2015-07-07' """, con)
    return scoring
    

    loan_requests = psql.read_sql("""
    select 
        lr.id_loan_request, 
        lr.loan_request_nr, 
        sr.fk_user,
        sr.id,
        lr.principal_amount,
        ua.net_income,
        uh.postcheck_data
    from base.loan_request lr
    join base.user_account ua on 
        lr.dwh_country_id=ua.dwh_country_id and
        lr.fk_user=ua.id_user
    join backend.loan_request_user_history uh on 
        lr.dwh_country_id=uh.dwh_country_id and
        lr.id_loan_request=uh.fk_loan_request
    join backend.scoring_result sr on
        lr.dwh_country_id=sr.dwh_country_id and
        lr.fk_user=sr.fk_user
    
    where sr.dwh_country_id=1 and sr.created_at>'2015-07-07'
    and sr.is_latest=1
    """,con)
    return loan_requests

Example #2

0

Show file

File: test_sql.py Project: caranfa/pandas

    def test_to_sql_index_label(self):
        temp_frame = DataFrame({'col1': range(4)})

        # no index name, defaults to 'index'
        sql.to_sql(temp_frame, 'test_index_label', self.conn)
        frame = sql.read_sql('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'index')

        # specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label='other_label')
        frame = sql.read_sql('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'other_label',
                         "Specified index_label not written to database")

        # using the index name
        temp_frame.index.name = 'index_name'
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace')
        frame = sql.read_sql('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'index_name',
                         "Index name not written to database")

        # has index name, but specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label='other_label')
        frame = sql.read_sql('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'other_label',
                         "Specified index_label not written to database")

Example #3

0

Show file

File: ServerConnectionlocal.py Project: gaussleescorpio/intelligencelib

 def Read_data_by_selection_index(self,con="*",tb_name=None,start_index=None, end_index=None,index_name="Default"):
     """
     This function is used to access database by setting up a particular scope with index (e.g. datatime)
     :param con: a string composed of all the retrieving attrs or cols, ex: "attr1, attr2"
     :param tb_name:denotes which table it is
     :param start_index: str means datetime "2014-02-01" or int means index
     :param end_index: str means datetime "2014-02-01" or int means index
     :return:
     """
     assert isinstance(index_name, basestring), "please input a valid index name"
     dict_rec = pd.DataFrame({})
     if not con:
         print "nothing to read from database"
         return -1
     else:
         # make sure the database is okay and table exists
         if self.database_name is not None and self.if_table_exist(tb_name):
             #num_of_entries = len(con)
             #access the data and retrieve
             if isinstance(start_index,basestring) and isinstance(end_index, basestring):
                 dict_rec = sql.read_sql( """select %s from %s where %s between '%s' and '%s' order by %s ASC """ \
                                        %(con, tb_name, index_name,start_index,end_index,index_name),
                                        con=self.db)
             else:
                 dict_rec = sql.read_sql( """select %s from %s where %s between %s and %s order by %s ASC""" \
                                        %(con, tb_name, index_name,start_index,end_index,index_name),
                                        con=self.db)
             #retrive data only within index range
             #dict_rec = dict_rec[ dict_rec.index.isin( range(start_index, end_index+1) ) ]
             return dict_rec
         else:
             print "database should be firstly set and table name %s should exist..."%(tb_name)
             return -1

Example #4

0

Show file

File: tissue_miner.py Project: SysSynBio/tissue_miner

    def load_DB_table(self, table_name):
        """
        Loads a table table_name from the SQL database associated with the class
        instance on initialization.
        If pickle format of the table exists and is newer than the database file,
        it will load the table from pickle file instead.

        Also checks whether the table is already loaded.
        """
        if not table_name in self.loaded:
            file_pickle= self.DB_path + self.name + '/' + table_name + '.pkl'
            file_sql= self.DB_path + self.name + '/' + self.name + '.sqlite'
            if ((not os.path.isfile(file_pickle)) or (os.path.getmtime(file_sql) > os.path.getmtime(file_pickle))):
                print('Loading table ' + table_name + ' from database ' + self.name + '...')
                if table_name in ('vertices', 'bonds', 'frames') :
                    self.DB_table[table_name]= psql.read_sql('SELECT * FROM ' + table_name + ';', self.con)
                else:
                    self.DB_table[table_name]= psql.read_sql('SELECT * FROM ' + table_name + ' WHERE cell_id > 10000;', self.con)

                print('Writing table ' + table_name + ' to pickle file ' + self.name)
                self.DB_table[table_name].to_pickle(file_pickle)
            else:
                print('Loading table ' + table_name + ' from pickle file...') 
                self.DB_table[table_name]= pd.read_pickle(file_pickle)
            if table_name == 'cells':
                self.DB_table[table_name]= self.DB_table[table_name][['frame', 'cell_id', 'center_x', 'center_y', 'area', 'elong_xx', 'elong_xy']]                
            self.loaded.add(table_name)

Example #5

0

Show file

File: i5gridder.py Project: akrherz/iemgrid

def roadtmpc(grids, valid, iarchive):
    """ Do the RWIS Road times grid"""
    if iarchive:
        nt = NetworkTable(['IA_RWIS', 'MN_RWIS', 'WI_RWIS', 'IL_RWIS',
                           'MO_RWIS', 'KS_RWIS', 'NE_RWIS', 'SD_RWIS'])
        pgconn = psycopg2.connect(database='rwis', host='iemdb', user='******')
        df = read_sql("""
            SELECT station, tfs0 as tsf0
            from alldata WHERE valid >= %s and valid < %s and
            tfs0 >= -50 and tfs0 < 150
            """, pgconn,  params=((valid - datetime.timedelta(minutes=30)),
                                  (valid + datetime.timedelta(minutes=30))),
                      index_col=None)
        df['lat'] = df['station'].apply(lambda x: nt.sts.get(x, {}).get('lat',
                                                                        0))
        df['lon'] = df['station'].apply(lambda x: nt.sts.get(x, {}).get('lon',
                                                                        0))
    else:
        pgconn = psycopg2.connect(database='iem', host='iemdb', user='******')
        df = read_sql("""
            SELECT ST_x(geom) as lon, ST_y(geom) as lat,
            tsf0
            from current c JOIN stations t on (c.iemid = t.iemid)
            WHERE c.valid > now() - '2 hours'::interval and
            t.network in ('IA_RWIS', 'MN_RWIS', 'WI_RWIS', 'IL_RWIS',
            'MO_RWIS', 'KS_RWIS', 'NE_RWIS', 'SD_RWIS') and tsf0 >= -50
            and tsf0 < 150
            """, pgconn, index_col=None)

    nn = NearestNDInterpolator((df['lon'].values, df['lat'].values),
                               temperature(df['tsf0'].values, 'F').value('C'))
    grids['roadtmpc'] = nn(XI, YI)

Example #6

0

Show file

File: querySenslopeDbForWebsite.py Project: updewsprado/updews-pycodes

def GetSensorList(site=''):
    if site == '':
        try:
            db, cur = SenslopeDBConnect(Namedb)
            cur.execute("use "+ Namedb)
            
            query = 'SELECT name, num_nodes, seg_length, col_length FROM site_column_props'
            
            df = psql.read_sql(query, db)
            
            # make a sensor list of columnArray class functions
            sensors = []
            for s in range(len(df)):
                if df.name[s] == 'mcatb' or df.name[s] == 'messb':
                    continue
                s = columnArray(df.name[s],df.num_nodes[s],df.seg_length[s],df.col_length[s])
                sensors.append(s)
            db.close()
            return sensors
        except:
            raise ValueError('Could not get sensor list from database')
    else:
            db, cur = SenslopeDBConnect(Namedb)
            cur.execute("use "+ Namedb)
            
            query = "SELECT name, num_nodes, seg_length, col_length FROM site_column_props WHERE name LIKE '%"
            query = query + str(site) + "%'"
            df = psql.read_sql(query, db)
            sensors = []
            s = columnArray(df.name[0],df.num_nodes[0],df.seg_length[0],df.col_length[0])
            sensors.append(s)
            db.close()
            return sensors

Example #7

0

Show file

File: manage_calc_process.py Project: meloun/ew_aplikace

    def GetJoinedDf(self):
        run_id = 12332 #dstore.Get("current_run")
            
        # DB df 
        #[u'state', u'id', u'run_id', u'user_id', u'cell', u'time_raw', u'time1', u'lap1', u'time2', u'lap2', u'time3', u'lap3', u'un1', u'un2', u'un3', u'us1']            
        self.joinedDf = psql.read_sql(\
                                "SELECT * FROM times" +\
                                " WHERE (times.run_id = "+ str(run_id ) +")"\
                                , self.db)                  
        
        #set index = id
        self.joinedDf.set_index('id',  drop=False, inplace = True)            
        
        #replace nan with None            
        self.joinedDf = self.joinedDf.where(pd.notnull(self.joinedDf), None)                    

        
        if(self.dstore.GetItem("racesettings-app", ['rfid']) == 2):
            tDf = psql.read_sql("SELECT * FROM tags", self.db, index_col = "id")   
            tDf = tDf[["user_nr", "tag_id"]]
            self.joinedDf =  pd.merge(self.joinedDf,  tDf, left_on='user_id', right_on='tag_id', how="left")
            self.joinedDf =  pd.merge(self.joinedDf,  self.ucDf, left_on='user_nr', right_on='nr',  how="left")
            self.joinedDf.set_index('id',  drop=False, inplace = True) 
        else:
            self.joinedDf =  pd.merge(self.joinedDf,  self.ucDf, left_on='user_id', right_index=True, how="left")
        

        self.joinedDf.sort("time_raw", inplace=True)            
        
        #replace nan with None
        self.joinedDf = self.joinedDf.where(pd.notnull(self.joinedDf), None)
                
        return self.joinedDf

Example #8

0

Show file

File: mysql_tools.py Project: volkerpetersen/SailingRace

    def fetch_query(self, query, index=None):
        """
        ------------------------------------------------------------------------------------------
         fetch records from the mysql database based on a custom query
         @param  query - the query to be used to fetch selected records from the mysql DB
         @param  index - the column to be used as df index
         @ret    df    - pandas df with the data retrieved from the mysql DB.  [] if no data found.
        ------------------------------------------------------------------------------------------
        """

        con = self.connect_db()
        if con == False:
            exit("\nTerminating Class module", __version__)

        if index:        
            df = psql.read_sql(query, con=con, index_col=index)
        else:
            df = psql.read_sql(query, con=con)
            
        try:
            con.close()
        except:
            print ("couldn't close mysql DB")

        return df.sort_index(axis=0)   # return DF sorted with ascending index

Example #9

0

Show file

File: mlra_percent_slopes_conv.py Project: akrherz/idep

def main(argv):
    """Go Main Go"""
    mlra_id = int(argv[1])
    pgconn = get_dbconn('idep')

    mlraxref = read_sql("""
    select distinct mlra_id, mlra_name from mlra
    """, pgconn, index_col='mlra_id')

    print("%s," % (mlraxref.at[mlra_id, 'mlra_name'], ), end='')
    for col, scenario in enumerate(range(36, 39)):
        df = read_sql("""
        with myhucs as (
            SELECT huc_12 from huc12 where scenario = 0 and mlra_id = %s
        )
        select fpath, f.huc_12 from flowpaths f, myhucs h
        WHERE f.scenario = 0 and f.huc_12 = h.huc_12
        """, pgconn, params=(mlra_id, ), index_col=None)
        if df.empty:
            print()
            continue
        hits = 0
        for _, row in df.iterrows():
            prj = (
                "/prj/%s/%s/%s_%s.prj"
             ) % (row['huc_12'][:8], row['huc_12'][8:], row['huc_12'],
                  row['fpath'])
            prj2 = "/i/%s/%s" % (scenario, prj)
            if open(prj2).read().find("SWITCHGRASS.rot") > 0:
                hits += 1
        print("%.2f," % (hits / float(len(df.index)) * 100., ), end="")
    print()

Example #10

0

Show file

File: compute_archive_begin_end.py Project: akrherz/DEV

def main():
    """Go Main Go."""
    pgconn = get_dbconn('postgis')
    base = read_sql("""
    select segid, archive_begin, archive_end from roads_base
    """, pgconn, index_col='segid')
    for year in range(2003, 2018):
        table = "roads_%s_%s_log" % (year, year + 1)
        df = read_sql("""
            SELECT segid, min(valid), max(valid) from """ + table + """
            WHERE valid is not null GROUP by segid
        """, pgconn, index_col='segid')
        for segid, row in df.iterrows():
            curmin = base.at[segid, 'archive_begin']
            curmax = base.at[segid, 'archive_end']
            if curmin is None or row['min'] < curmin:
                base.at[segid, 'archive_begin'] = row['min']
            if curmax is None or row['max'] > curmax:
                base.at[segid, 'archive_end'] = row['max']

    cursor = pgconn.cursor()
    for segid, row in base.iterrows():
        if pd.isnull(row['archive_begin']) or pd.isnull(row['archive_end']):
            continue
        print("%s %s -> %s" % (
            segid, row['archive_begin'], row['archive_end']))
        cursor.execute("""
        update roads_base SET archive_begin = %s, archive_end = %s
        where segid = %s
        """, (row['archive_begin'], row['archive_end'], segid))
    cursor.close()
    pgconn.commit()

Example #11

0

Show file

File: 181003_extreme.py Project: akrherz/DEV

def main():
    """Go!"""
    nt = NetworkTable("IA_ASOS")
    pgconn = get_dbconn('iem')
    df = read_sql("""
    SELECT id, valid, tmpf::int as tmpf, dwpf::int as dwpf,
    sknt from current_log c JOIN stations t
    on (c.iemid = t.iemid) WHERE t.network = 'IA_ASOS' and
    c.valid > 'TODAY' and c.tmpf > 70 ORDER by id ASC
    """, pgconn, index_col=None)

    pgconn = get_dbconn('asos')
    for _, row in df.iterrows():
        df2 = read_sql("""
            SELECT valid, tmpf, dwpf, sknt from alldata WHERE station = %s
            and valid < '2018-10-03' and tmpf::int >= %s and dwpf::int >= %s
            and sknt >= %s ORDER by valid DESC
        """, pgconn, params=(row['id'], row['tmpf'], row['dwpf'], row['sknt']),
                       index_col=None)
        if len(df2.index) > 5:
            continue
        lastdate = None
        if not df2.empty:
            lastdate = df2.iloc[0]['valid'].date()
        print(
            ("%s,%s,%s,%s,%.0f,%s,%s,%s"
             ) % (row['id'], row['valid'], row['tmpf'], row['dwpf'],
                  row['sknt'], len(df2.index), lastdate,
                  nt.sts[row['id']]['archive_begin'].year)
        )

Example #12

0

Show file

File: test_sql.py Project: FashtimeDotCom/pandas

    def test_date_parsing(self):
        """ Test date parsing in read_sql """
        # No Parsing
        df = sql.read_sql(
            "SELECT * FROM types_test_data", self.conn, flavor='sqlite')
        self.assertFalse(
            issubclass(df.DateCol.dtype.type, np.datetime64),
            "DateCol loaded with incorrect type")

        df = sql.read_sql("SELECT * FROM types_test_data",
                          self.conn, flavor='sqlite', parse_dates=['DateCol'])
        self.assertTrue(
            issubclass(df.DateCol.dtype.type, np.datetime64),
            "DateCol loaded with incorrect type")

        df = sql.read_sql("SELECT * FROM types_test_data", self.conn,
                          flavor='sqlite',
                          parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
        self.assertTrue(
            issubclass(df.DateCol.dtype.type, np.datetime64),
            "DateCol loaded with incorrect type")

        df = sql.read_sql("SELECT * FROM types_test_data",
                          self.conn, flavor='sqlite',
                          parse_dates=['IntDateCol'])

        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql("SELECT * FROM types_test_data",
                          self.conn, flavor='sqlite',
                          parse_dates={'IntDateCol': 's'})

        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

Example #13

0

Show file

File: mlra_summary.py Project: akrherz/idep

def main(argv):
    """Go Main Go"""
    mlra_id = int(argv[1])
    pgconn = get_dbconn('idep')

    fig, ax = plt.subplots(3, 3, figsize=(12, 6))

    mlraxref = read_sql("""
    select distinct mlra_id, mlra_name from mlra
    """, pgconn, index_col='mlra_id')
    fig.text(0.5, 0.96,
             ("CSCAP Yearly Scenario Changes for MLRA: %s [%s]"
              ) % (mlraxref.at[mlra_id, 'mlra_name'], mlra_id),
             ha='center', fontsize=16)

    for col, scenario in enumerate(range(36, 39)):
        df = read_sql("""
        with myhucs as (
            SELECT huc_12 from huc12 where scenario = 0 and mlra_id = %s
        )
        select r.huc_12, scenario, extract(year from valid)::int as year,
        sum(avg_loss) * 10. as loss, sum(avg_runoff) as runoff,
        sum(avg_delivery) * 10. as delivery
        from results_by_huc12 r JOIN myhucs h on (r.huc_12 = h.huc_12)
        where r.valid >= '2008-01-01' and r.valid < '2017-01-01'
        and (scenario = 0 or scenario = %s)
        GROUP by r.huc_12, year, scenario
        """, pgconn, params=(mlra_id, scenario), index_col=None)
        gdf = df.groupby(('scenario', 'year')).mean()
        delta = gdf.loc[(scenario, )] - gdf.loc[(0, )]
        for row, varname in enumerate(['runoff', 'loss', 'delivery']):
            ax[row, col].bar(delta.index.values, delta[varname],
                             color=['b'
                                    if x > 0 else 'r'
                                    for x in delta[varname].values])
            if row == 0:
                ax[row, col].set_title("Scenario %s\n%s" % (scenario,
                                                            LABELS[scenario]))
    ylabels = ['Runoff [mm]', 'Detachment\n[tonnes/ha]',
               'Delivery [tonnes/ha]']
    for row in range(3):
        ymin = 99
        ymax = -99
        for col in range(3):
            ylim = ax[row, col].get_ylim()
            ymin = min([ylim[0], ymin])
            ymax = max([ylim[1], ymax])
        for col in range(3):
            ax[row, col].set_ylim(ymin, ymax)
            ax[row, col].grid(True)
            if col > 0:
                ax[row, col].set_yticklabels([])
            else:
                ax[row, col].set_ylabel(ylabels[row])

    fig.savefig('mlra%s.png' % (mlra_id, ))
    fig.savefig('mlra%s.pdf' % (mlra_id, ))
    plt.close()

Example #14

0

Show file

File: p160.py Project: akrherz/iem

def get_context(fdict):
    pgconn = psycopg2.connect(database='hads', host='iemdb-hads',
                              user='******')
    cursor = pgconn.cursor()
    ctx = get_autoplot_context(fdict, get_description())

    ctx['station'] = ctx['station'].upper()
    station = ctx['station']
    dt = ctx['dt']

    # Attempt to get station information
    cursor.execute("""
    SELECT name from stations where id = %s and network ~* 'DCP'
    """, (station,))
    ctx['name'] = ""
    if cursor.rowcount > 0:
        row = cursor.fetchone()
        ctx['name'] = row[0]

    ctx['df'] = read_sql("""with fx as (
        select id, issued, primaryname, primaryunits, secondaryname,
        secondaryunits from hml_forecast where station = %s
        and generationtime between %s and %s)
    SELECT f.id, f.issued, d.valid, d.primary_value, f.primaryname,
    f.primaryunits, d.secondary_value, f.secondaryname,
    f.secondaryunits from
    hml_forecast_data_""" + str(dt.year) + """ d JOIN fx f
    on (d.hml_forecast_id = f.id) ORDER by f.id ASC, d.valid ASC
    """, pgconn, params=(station, dt - datetime.timedelta(days=3),
                         dt + datetime.timedelta(days=1)), index_col=None)
    if len(ctx['df'].index) > 0:
        ctx['primary'] = "%s[%s]" % (ctx['df'].iloc[0]['primaryname'],
                                     ctx['df'].iloc[0]['primaryunits'])
        ctx['secondary'] = "%s[%s]" % (ctx['df'].iloc[0]['secondaryname'],
                                       ctx['df'].iloc[0]['secondaryunits'])

        # get obs
        mints = ctx['df']['valid'].min()
        maxts = ctx['df']['valid'].max()
    else:
        mints = dt - datetime.timedelta(days=3)
        maxts = dt + datetime.timedelta(days=3)
    df = read_sql("""
    SELECT valid, h.label, value
    from hml_observed_data_""" + str(dt.year) + """ d JOIN hml_observed_keys h
    on (d.key = h.id)
    WHERE station = %s and valid between %s and %s ORDER by valid
    """, pgconn, params=(station, mints, maxts), index_col=None)
    ctx['odf'] = df.pivot('valid', 'label', 'value')
    if len(ctx['df'].index) > 0:
        ctx['df'] = pd.merge(ctx['df'], ctx['odf'], left_on='valid',
                             right_index=True, how='left', sort=False)
    ctx['title'] = "[%s] %s" % (ctx['station'], ctx['name'])
    ctx['subtitle'] = ctx['dt'].strftime("%d %b %Y %H:%M UTC")
    if len(ctx['df'].index) == 0 and len(ctx['odf'].index) > 0:
        ctx['primary'] = ctx['odf'].columns[0]
        ctx['secondary'] = ctx['odf'].columns[1]
    return ctx

Example #15

0

Show file

File: utils.py Project: rpb/pymadlib

def madlibLS(sConn, schema=None):
    if schema is None:
        return psql.read_sql("select * from information_schema.schemata;", sConn)
    else:
        stmt = """SELECT * FROM information_schema.tables 
                  WHERE table_schema = '{schema}';""".format(
            schema=schema
        )
        return psql.read_sql(stmt, sConn)

Example #16

0

Show file

File: p148.py Project: akrherz/iem

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    ctx = get_autoplot_context(fdict, get_description())
    station = ctx['station']
    network = ctx['network']
    varname = ctx['var']
    thedate = ctx['thedate']
    date = ctx['date']

    nt = NetworkTable(network)
    pgconn = psycopg2.connect(database='coop', host='iemdb', user='******')

    table = "alldata_%s" % (station[:2], )
    if date == 'exact':
        df = read_sql("""
        SELECT year, high, day, precip from """ + table + """
        WHERE station = %s
        and sday = %s ORDER by year ASC
        """, pgconn, params=(station, thedate.strftime("%m%d")),
                      index_col='year')
        subtitle = thedate.strftime("%B %-d")
    else:
        if date == 'memorial':
            days = memorial_days()
        elif date == 'thanksgiving':
            days = thanksgiving()
        else:
            days = labor_days()

        df = read_sql("""
        SELECT year, high, day, precip from """ + table + """
        WHERE station = %s
        and day in %s ORDER by year ASC
        """, pgconn, params=(station, tuple(days)),
                      index_col='year')
        subtitle = PDICT[date]

    (fig, ax) = plt.subplots(1, 1)

    ax.bar(df.index.values, df[varname], fc='r', ec='r', align='center')
    mean = df[varname].mean()
    ax.axhline(mean)
    ax.text(df.index.values[-1] + 1, mean, '%.2f' % (mean,), ha='left',
            va='center')
    ax.grid(True)
    ax.set_title(("%s [%s] Daily %s\non %s"
                  ) % (nt.sts[station]['name'], station, PDICT2[varname],
                       subtitle))
    ax.set_xlim(df.index.values.min() - 1,
                df.index.values.max() + 1)
    ax.set_ylabel(PDICT2[varname])
    if varname != 'precip':
        ax.set_ylim(df[varname].min() - 5, df[varname].max() + 5)
    return fig, df

Example #17

0

Show file

 def GetDataframe(self): 
     uDf = psql.read_sql(\
         "SELECT * FROM " + str(self.name )
         , db.getDb())
             # CATEGORY df
     cDf = psql.read_sql("SELECT id, name FROM categories", db.getDb())
     cDf.columns = ['cdf_id', 'category']                          
     uDf =  pd.merge(uDf,  cDf, left_on='category_id', right_on='cdf_id', how="left")        
     uDf = uDf[["id", "nr", "status", "name",  "first_name", "category", "club", "year", "sex", "email", "o1", "o2", "o3", "o4"]]
        
     return uDf

Example #18

0

Show file

File: postgres_tool.py Project: TGPSKI/postgres_tool

	def fetch_pandas(self, select_statement, tb_name, query=''):
		"""
		Same data format as PSQLPython.fetch(select_statement, tb_name, query=''), but returns a pandas dataframe
		"""
		if query == '':
			pandas_output = pdsql.read_sql(("""SELECT %(select_statement)s FROM %(tb_name)s"""),self.con, \
					params={"select_statement": AsIs(select_statement), "tb_name": AsIs(tb_name)})
		else:
			pandas_output = pdsql.read_sql(("""SELECT %(select_statement)s FROM %(tb_name)s WHERE %(query)s"""), \
					self.con, params={"select_statement": AsIs(select_statement), "tb_name": AsIs(tb_name), \
					"query": AsIs(query)})
		return pandas_output

Example #19

0

Show file

File: test_sql.py Project: Mistobaan/pandas

    def test_read_sql_delegate(self):
        iris_frame1 = sql.read_sql_query(
            "SELECT * FROM iris", self.conn)
        iris_frame2 = sql.read_sql(
            "SELECT * FROM iris", self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2,
                              "read_sql and read_sql_query have not the same"
                              " result with a query")

        iris_frame1 = sql.read_sql_table('iris', self.conn)
        iris_frame2 = sql.read_sql('iris', self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2)

Example #20

0

Show file

File: clean_data.py Project: lijenniferx/HomewardBound

def read_in_data():
    ''' reads in the cleaned table from MySQL'''
    con =  mdb.connect('localhost', 'root','***','HomewardBound'); 
    
    with con:
        dogs = sql.read_sql('select * from Pets_cleaned;', con = con)   
        weather = sql.read_sql('select * from PetWeather;',con = con)
        econ = sql.read_sql('select * from PetEcon;',con = con)
        


    return dogs, weather, econ

Example #21

0

Show file

File: FraudWebApp.py Project: kskk02/Fraud_Detector

    def Find_Next_Fraudster (table,possible_fraudsters_index):
        global max_fraudsters_in_DB
        engine = create_engine('postgresql://*****:*****@localhost:5432/fraud_detector')
        cnx = engine.raw_connection()
        possible_fraudsters_df = sql.read_sql("SELECT * FROM " + table , cnx)
        print "max_fraudsters_in_DB before" , max_fraudsters_in_DB

        max_fraudsters_in_DB = sql.read_sql("SELECT count(*) FROM " + table , cnx)['count'][0]
        print "max_fraudsters_in_DB after" , max_fraudsters_in_DB
        cnx.close()
        print "fraudster index is " , possible_fraudsters_index
        fraudster_id = possible_fraudsters_df.iloc[possible_fraudsters_index,1].encode('utf-8')
        return fraudster_id

Example #22

0

Show file

File: manage_calc_process.py Project: meloun/ew_aplikace

 def GetUserCategoryDf(self):
     # USER df
     uDf = psql.read_sql("SELECT * FROM users", self.db, index_col = "id")                 
     uDf["name"] = uDf['name'].str.upper() +' '+uDf['first_name']      
     
     # CATEGORY df
     cDf = psql.read_sql("SELECT id, name, start_nr FROM categories", self.db)
     cDf.columns = ['id', 'category', "start_nr"]                                  
     uDf =  pd.merge(uDf,  cDf, left_on='category_id', right_on='id', how="left")        
     
            
     #print "u", uDf.columns 
     uDf = uDf[["nr", "status", "name", "category", "start_nr", "year", "club", "sex", "o1", "o2", "o3", "o4"]]
     return uDf

Example #23

0

Show file

File: download.py Project: qyqbird/stock_work

 def load_data(self, stock):
     '''
     加载股票历史数据
     '''
     search_sql = "select * from {0}".format('day_'+stock)
     try:
         raw_data = SQL.read_sql(search_sql, self.engine)
         return raw_data
     except Exception,data:
         data = ts.get_h_data(stock)
         data = data.sort_index(ascending=True) 
         data.to_sql('day_'+stock, self.engine,if_exists='append')
         raw_data = SQL.read_sql(search_sql, self.engine)
         return raw_data

Example #24

0

Show file

File: p68.py Project: muthulatha/iem

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    pgconn = psycopg2.connect(database='postgis', host='iemdb', user='******')

    station = fdict.get('station', 'DMX')[:4]

    nt = NetworkTable('WFO')
    nt.sts['_ALL'] = {'name': 'All Offices'}

    (fig, ax) = plt.subplots(1, 1, sharex=True)

    if station == '_ALL':
        df = read_sql("""
            with obs as (
                SELECT distinct extract(year from issue) as yr,
                phenomena, significance from warnings WHERE
                phenomena is not null and significance is not null and
                issue > '2005-01-01' and issue is not null
            )
            SELECT yr as year, count(*) from obs GROUP by yr ORDER by yr ASC
            """, pgconn, index_col=None)
    else:
        df = read_sql("""
            with obs as (
                SELECT distinct extract(year from issue) as yr,
                phenomena, significance from warnings WHERE
                wfo = %s and phenomena is not null and significance is not null
                and issue > '2005-01-01' and issue is not null
            )
            SELECT yr as year, count(*) from obs GROUP by yr ORDER by yr ASC
            """, pgconn, params=(station, ), index_col=None)

    df['wfo'] = station
    df['year'] = df['year'].astype('i')

    ax.bar(df['year']-0.4, df['count'], width=0.8, fc='b', ec='b')
    for yr, val in zip(df['year'], df['count']):
        ax.text(yr, val+1, "%s" % (val,), ha='center')
    ax.set_title(("[%s] NWS %s\nCount of Distinct VTEC Phenomena/"
                  "Significance - %i to %i"
                  ) % (station, nt.sts[station]['name'],
                       df['year'].min(), df['year'].max()))
    ax.grid()
    ax.set_ylabel("Count")

    return fig, df

Example #25

0

Show file

File: const_func_def.py Project: peiyuanmao/Email_Digest_ML

def create_email_content_database():
    """
    Query the SQLite database and read the email content table, return it as a
    pandas dataframe. Note that since this table is very large querying is slow,
    we only read it once and save the data in a csv for repeated use.
    """
    saved_file = data_dir + email_content_saved_fname
    if path.isfile(saved_file):
        print 'Email content was previously read, loading from save...'
        email_content = pd.read_csv(saved_file)
    else:
        print 'Reading email content for the first time. This may take a while...'
        conn = sqlite3.connect(data_dir + database_name)
        email_content_query = "SELECT * FROM email_content;"
        email_content = sql.read_sql(email_content_query, con=conn)
        # Convert the datetime string to datetime objects
        email_content = email_content.rename(columns={'send_time': 'datetime'})
        email_content.to_csv(saved_file)
        conn.close()
    # Fix the data types
    email_content['article_id'] = email_content['article_id'].astype('int')
    email_content['user_id'] = email_content['user_id'].astype('int')
    email_content['email_id'] = email_content['email_id'].astype('int')
    email_content['datetime'] = pd.to_datetime(email_content['datetime'])
    print 'Email database loaded.'
    return email_content

Example #26

0

Show file

File: smts.py Project: akrherz/iem

def make_vsm_histogram_plot(station, sts, ets):
    """Option 6"""
    df = read_sql("""
        SELECT
        CASE WHEN t12_c_avg_qc > 1 then vwc_12_avg_qc else null end as v12,
        CASE WHEN t24_c_avg_qc > 1 then vwc_24_avg_qc else null end as v24,
        CASE WHEN t50_c_avg_qc > 1 then vwc_50_avg_qc else null end as v50
        from sm_hourly
        where station = %s and valid >= %s and valid < %s
    """, ISUAG, params=(station, sts, ets), index_col=None)

    (_, ax) = plt.subplots(3, 1, sharex=True)
    ax[0].set_title(("ISUSM Station: %s VWC Histogram\n"
                     "For un-frozen condition between %s and %s"
                     ) % (nt.sts[station]['name'],
                          sts.strftime("%-d %b %Y"),
                          ets.strftime("%-d %b %Y")))
    for i, col in enumerate(['v12', 'v24', 'v50']):
        ax[i].hist(df[col] * 100., bins=50, range=(0, 50), normed=True)
        ax[i].set_ylabel("Frequency")
        ax[i].grid(True)
        ax[i].text(0.99, 0.99, "%s inches" % (col[1:],),
                   transform=ax[i].transAxes, ha='right', va='top')
        ax[i].set_yscale("log")

    ax[2].set_xlabel("Volumetric Water Content [%]")
    sys.stdout.write("Content-Type: image/png\n\n")
    plt.savefig(sys.stdout, format='png')

Example #27

0

Show file

File: process_file_syllables.py Project: iangow/streetevents

def getQuestionData(file_name):
    from pandas.io.sql import read_sql

    conn = engine.connect()
    table_exists = engine.dialect.has_table(conn, the_table, schema=the_schema)
    conn.close()

    # It may be better to explicitly create the table elsewhere.
    # Checking like this might be slower.
    if table_exists:
        sql = "DELETE FROM %s.%s WHERE file_name='%s'" % \
            (the_schema, the_table, file_name)

        engine.execute(sql)


    sql = """
       	SELECT file_name, last_update, context, speaker_number, speaker_text
    	FROM streetevents.speaker_data
	    WHERE file_name='%s'
        """ % (file_name)

    df = read_sql(sql, engine)

    return df

Example #28

0

Show file

File: download.py Project: qyqbird/stock_work

 def load_data(self, stock):
     '''
     加载股票历史数据
     '''
     search_sql = "select * from {0}".format('day_'+stock)
     raw_data = SQL.read_sql(search_sql, self.engine)
     return raw_data

Example #29

0

Show file

File: yearly_size_count.py Project: akrherz/DEV

def main():
    """Go Main Go"""
    pgconn = get_dbconn('postgis')
    sql = get_polygon()
    df = read_sql(sql, pgconn, index_col='year')
    print(df)

    (fig, ax) = plt.subplots(1, 1)

    ax.bar(df.index.values - 0.2, df['count'].values, width=0.4, fc='r')
    ax.set_ylabel("Warning Count", color='r')

    y2 = ax.twinx()
    y2.bar(df.index.values + 0.2, df['area'].values, width=0.4, fc='b')
    y2.set_ylabel("Size (Continental United States)", color='b')

    p1 = plt.Rectangle((0, 0), 1, 1, fc="r")
    p3 = plt.Rectangle((0, 0), 1, 1, fc="b")
    ax.legend([p1, p3], ["Counts", "Size"], loc=2)
    ax.grid(True)

    ax.set_title("NWS *Storm Based* Tornado + Severe Thunderstorm Warnings")
    ax.set_ylim(0, 90000)
    y2.set_ylim(0, 25)
    ax.set_xlim(1985.5, 2017.5)

    fig.text(0.01, 0.01, 'Generated %s' % (datetime.date.today(), ))
    fig.savefig('test.png')

Example #30

0

Show file

File: siteprogress.py Project: akrherz/datateam

def do_site(site):
    """Print out a simple listing of trouble"""
    df = read_sql("""
    with ag as (
        select year, varname, value, count(*) from agronomic_data
        where uniqueid = %s and (value is null or value in ('', '.'))
        GROUP by year, varname, value),
    soil as (
        select year, varname, value, count(*) from soil_data
        where uniqueid = %s and (value is null or value in ('', '.'))
        GROUP by year, varname, value)

    SELECT * from ag UNION select * from soil ORDER by year ASC, varname ASC
    """, DBCONN, params=(site, site), index_col=None)
    ssw("Content-type: text/plain\n\n")
    ssw("CSCAP Variable Progress Report\n")
    ssw("Site: %s\n" % (site,))
    ssw("Generated: %s\n" % (
        datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), ))
    ssw("Total Missing: %s\n" % (df['count'].sum(),))
    ssw("%4s %-10s %-10s %-6s\n" % ('YEAR', 'VARNAME', 'VALUE', 'COUNT'))

    def nice(val):
        if val is None:
            return 'Empty'
        if val == '':
            return 'Empty'
        if val == '.':
            return "Period"
        return val

    for _, row in df.iterrows():
        ssw("%s %-10s %-10s %-6s\n" % (row['year'], row['varname'],
                                       nice(row['value']), row['count']))

Example #31

0

Show file

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn("postgis")
    ctx = get_autoplot_context(fdict, get_description())
    sts = ctx["sdate"]
    ets = ctx["edate"]
    if (ets - sts).days > 366:
        raise NoDataFound("Chart duration needs to be less than 1 year.")
    wfo = ctx["wfo"]
    p1 = ctx["phenomenav1"]
    p2 = ctx["phenomenav2"]
    p3 = ctx["phenomenav3"]
    p4 = ctx["phenomenav4"]
    phenomena = []
    for p in [p1, p2, p3, p4]:
        if p is not None:
            phenomena.append(p[:2])
    s1 = ctx["significancev1"]
    s2 = ctx["significancev2"]
    s3 = ctx["significancev3"]
    s4 = ctx["significancev4"]
    significance = []
    for s in [s1, s2, s3, s4]:
        if s is not None:
            significance.append(s[0])

    pstr = []
    title = ""
    for i, (p, s) in enumerate(zip(phenomena, significance)):
        pstr.append("(phenomena = '%s' and significance = '%s')" % (p, s))
        if i == 2:
            title += "\n"
        title += "%s %s.%s, " % (vtec.get_ps_string(p, s), p, s)
    pstr = " or ".join(pstr)
    pstr = "(%s)" % (pstr, )

    if ctx["w"] == "wfo":
        ctx["_nt"].sts["_ALL"] = {
            "name": "All Offices",
            "tzname": "America/Chicago",
        }
        if wfo not in ctx["_nt"].sts:
            raise NoDataFound("No Data Found.")
        wfo_limiter = (" and wfo = '%s' ") % (wfo
                                              if len(wfo) == 3 else wfo[1:], )
        if wfo == "_ALL":
            wfo_limiter = ""
        tzname = ctx["_nt"].sts[wfo]["tzname"]
    else:
        wfo_limiter = " and substr(ugc, 1, 2) = '%s' " % (ctx["state"], )
        tzname = "America/Chicago"

    df = read_sql(
        f"""
with events as (
  select wfo, min(issue at time zone %s) as localissue,
  extract(year from issue) as year,
  phenomena, significance, eventid from warnings
  where {pstr} {wfo_limiter} and
  issue >= %s and issue < %s GROUP by wfo, year, phenomena, significance,
  eventid
)

SELECT date(localissue), count(*) from events GROUP by date(localissue)
    """,
        pgconn,
        params=(
            tzname,
            sts - datetime.timedelta(days=2),
            ets + datetime.timedelta(days=2),
        ),
        index_col="date",
    )

    data = {}
    now = sts
    while now <= ets:
        data[now] = {"val": 0}
        now += datetime.timedelta(days=1)
    for date, row in df.iterrows():
        data[date] = {"val": row["count"]}
    if ctx["w"] == "wfo":
        title2 = "NWS %s [%s]" % (ctx["_nt"].sts[wfo]["name"], wfo)
        if wfo == "_ALL":
            title2 = "All NWS Offices"
    else:
        title2 = state_names[ctx["state"]]
    fig = calendar_plot(
        sts,
        ets,
        data,
        heatmap=(ctx["heatmap"] == "yes"),
        title=("Number of VTEC Events for %s by Local Calendar Date") %
        (title2, ),
        subtitle="Valid %s - %s for %s" %
        (sts.strftime("%d %b %Y"), ets.strftime("%d %b %Y"), title),
    )
    return fig, df

Example #32

0

Show file

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    ctx = get_autoplot_context(fdict, get_description())
    station = ctx['station']
    varname = ctx['var']
    network = 'RAOB'
    ts = ctx['date']
    hour = int(ctx['hour'])
    ts = datetime.datetime(ts.year, ts.month, ts.day, hour)
    ts = ts.replace(tzinfo=pytz.timezone("UTC"))
    which = ctx['which']
    vlimit = ''
    if which == 'month':
        vlimit = (" and extract(month from f.valid) = %s ") % (ts.month, )
    nt = NetworkTable(network)
    name = nt.sts[station]['name']
    stations = [
        station,
    ]
    if station.startswith("_"):
        name = nt.sts[station]['name'].split("--")[0]
        stations = nt.sts[station]['name'].split("--")[1].strip().split(",")
    pgconn = psycopg2.connect(database='postgis', host='iemdb', user='******')

    df = read_sql("""
    with data as (
        select f.valid, p.pressure, count(*) OVER (PARTITION by p.pressure),
        min(valid) OVER () as min_valid, max(valid) OVER () as max_valid,
        p.tmpc,
        rank() OVER (PARTITION by p.pressure ORDER by p.tmpc ASC) as tmpc_rank,
        min(p.tmpc) OVER (PARTITION by p.pressure) as tmpc_min,
        max(p.tmpc) OVER (PARTITION by p.pressure) as tmpc_max,
        p.dwpc,
        rank() OVER (PARTITION by p.pressure ORDER by p.dwpc ASC) as dwpc_rank,
        min(p.dwpc) OVER (PARTITION by p.pressure) as dwpc_min,
        max(p.dwpc) OVER (PARTITION by p.pressure) as dwpc_max,
        p.height as hght,
        rank() OVER (
            PARTITION by p.pressure ORDER by p.height ASC) as hght_rank,
        min(p.height) OVER (PARTITION by p.pressure) as hght_min,
        max(p.height) OVER (PARTITION by p.pressure) as hght_max,
        p.smps,
        rank() OVER (PARTITION by p.pressure ORDER by p.smps ASC) as smps_rank,
        min(p.smps) OVER (PARTITION by p.pressure) as smps_min,
        max(p.smps) OVER (PARTITION by p.pressure) as smps_max
        from raob_flights f JOIN raob_profile p on (f.fid = p.fid)
        WHERE f.station in %s
        and extract(hour from f.valid at time zone 'UTC') = %s
        """ + vlimit + """
        and p.pressure in (925, 850, 700, 500, 400, 300, 250, 200,
        150, 100, 70, 50, 10))

    select * from data where valid = %s ORDER by pressure DESC
    """,
                  pgconn,
                  params=(tuple(stations), hour, ts),
                  index_col='pressure')
    if len(df.index) == 0:
        raise Exception("No data found for query")
    for key in PDICT3.keys():
        df[key + '_percentile'] = df[key + '_rank'] / df['count'] * 100.

    ax = plt.axes([0.1, 0.12, 0.65, 0.75])
    bars = ax.barh(range(len(df.index)),
                   df[varname + '_percentile'],
                   align='center')
    y2labels = []
    fmt = '%.1f' if varname not in [
        'hght',
    ] else '%.0f'
    for i, bar in enumerate(bars):
        ax.text(bar.get_width() + 1,
                i,
                '%.1f' % (bar.get_width(), ),
                va='center',
                bbox=dict(color='white'))
        y2labels.append((fmt + ' (' + fmt + ' ' + fmt + ')') %
                        (df.iloc[i][varname], df.iloc[i][varname + "_min"],
                         df.iloc[i][varname + "_max"]))
    ax.set_yticks(range(len(df.index)))
    ax.set_yticklabels(['%.0f' % (a, ) for a in df.index.values])
    ax.set_ylim(-0.5, len(df.index) - 0.5)
    ax.set_xlabel("Percentile [100 = highest]")
    ax.set_ylabel("Mandatory Pressure Level (hPa)")
    plt.gcf().text(
        0.5,
        0.9, ("%s %s %s Sounding\n"
              "(%s-%s) Percentile Ranks (%s) for %s") %
        (station, name, ts.strftime("%Y/%m/%d %H UTC"),
         df.iloc[0]['min_valid'].year, df.iloc[0]['max_valid'].year,
         ("All Year" if which == 'none' else calendar.month_name[ts.month]),
         PDICT3[varname]),
        ha='center',
        va='bottom')
    ax.grid(True)
    ax.set_xticks([0, 5, 10, 25, 50, 75, 90, 95, 100])
    ax.set_xlim(0, 110)
    ax.text(1.02, 1, 'Ob  (Min  Max)', transform=ax.transAxes)

    ax2 = ax.twinx()
    ax2.set_ylim(-0.5, len(df.index) - 0.5)
    ax2.set_yticks(range(len(df.index)))
    ax2.set_yticklabels(y2labels)
    return plt.gcf(), df

Example #33

0

Show file

def make_plot(form):
    """Make the plot"""
    uniqueid = form.getfirst('site', 'ISUAG').split("::")[0]

    pgconn = get_dbconn('sustainablecorn')
    viewopt = form.getfirst('view', 'plot')
    varname = form.getfirst('varname', 'WAT2')
    df = read_sql("""
    SELECT uniqueid, plotid, valid at time zone 'UTC' as v, value
    from waterquality_data WHERE uniqueid = %s
    and varname = %s ORDER by valid ASC
    """,
                  pgconn,
                  params=(uniqueid, varname))

    if viewopt not in ['plot', 'js']:
        newcolname = "%s, %s" % (VARDICT[varname]['title'],
                                 VARDICT[varname]['units'])
        df.rename(columns=dict(v='timestamp', value=newcolname), inplace=True)
        df = add_bling(pgconn, df, 'Water')
        if viewopt == 'html':
            ssw("Content-type: text/html\n\n")
            ssw(df.to_html(index=False))
            return
        if viewopt == 'csv':
            ssw('Content-type: application/octet-stream\n')
            ssw(('Content-Disposition: attachment; '
                 'filename=%s.csv\n\n') % (uniqueid, ))
            ssw(df.to_csv(index=False))
            return
        if viewopt == 'excel':
            ssw('Content-type: application/octet-stream\n')
            ssw(('Content-Disposition: attachment; '
                 'filename=%s.xlsx\n\n') % (uniqueid, ))
            writer = pd.ExcelWriter('/tmp/ss.xlsx')
            df.to_excel(writer, 'Data', index=False)
            worksheet = writer.sheets['Data']
            worksheet.freeze_panes(3, 0)
            writer.save()
            ssw(open('/tmp/ss.xlsx', 'rb').read())
            os.unlink('/tmp/ss.xlsx')
            return

    # Begin highcharts output
    ssw("Content-type: application/javascript\n\n")
    title = ("Water Quality for Site: %s") % (uniqueid, )
    splots = []
    plot_ids = df['plotid'].unique()
    plot_ids.sort()
    df['ticks'] = df['v'].astype(np.int64) // 10**6
    for plotid in plot_ids:
        df2 = df[df['plotid'] == plotid]
        splots.append(
            ("""{type: 'scatter',
            name: '""" + plotid + """',
            data: """ +
             str([[a, b]
                  for a, b in zip(df2['ticks'].values, df2['value'].values)]) +
             """
        }""").replace("None", "null").replace("nan", "null"))
    series = ",".join(splots)
    ssw("""
$("#hc").highcharts({
    title: {text: '""" + title + """'},
    chart: {zoomType: 'x'},
    yAxis: {title: {text: '""" + VARDICT[varname]["title"] + """ """ +
        VARDICT[varname]["units"] + """'}
    },
    plotOptions: {line: {turboThreshold: 0}
    },
    xAxis: {
        type: 'datetime'
    },
    tooltip: {
        pointFormat: 'date: <b>{point.x:%b %e %Y, %H:%M}</b><br/>value: <b>{point.y}</b><br/>',
        shared: true,
        valueDecimals: 2,
        valueSuffix: '""" + VARDICT[varname]["units"] + """'
    },
    series: [""" + series + """]
});
    """)

Example #34

0

Show file

get_ipython().magic('matplotlib inline')
plt.plot(res[:, 0], res[:, 1], 'ro')
plt.grid(True); plt.xlim(-0.5, 4.5); plt.ylim(-4.5, 0.5)

"""
###7.2.2 从SQL到pandas
用pandas读取整个表或者查询结果通常更为高效。
在可以将整个表读入内存时，分析查询的执行通常比使用基于磁盘的SQL方法快得多。

总结起来可以对50M的模拟数据集得出如下结论:
    写入SQLite3数据花费数秒,而使用pandas使用不到1s
    从SQL数据库读取数据花费数秒,而pandas花费不到1s
"""
#pandas.io.sql子库中包含处理SQL数据库中所保存数据的函数
import pandas.io.sql as pds
%time data = pds.read_sql('SELECT * FROM numbers', con)
data.head()

#数据在内存中可以进行更快的分析
%time  data[(data['No1'] > 0) & (data['No2'] < 0)].head()
#pandas能够控制更为复杂的查询，但是不能替换结构复杂的关系型数结构
%time res = data[['No1', 'No2']][((data['No1'] > 0.5) | (data['No1'] < -0.5)) & ((data['No2'] < -1) | (data['No2'] > 1))]
plt.plot(res.No1, res.No2, 'ro')
plt.grid(True); plt.axis('tight')

"""
正如预期，只要pandas能够复制对应的SQL语句，使用pandas的内存分析能力可以大大加速。
这不仅是pandas的优势，而是pandas与PyTables紧密集成实现的。
"""

#

Example #35

0

Show file

File: p197.py Project: jamayfieldjr/iem

def get_df(ctx):
    """Figure out what data we need to fetch here"""
    date = ctx['date']
    # Rectify to Sunday
    if date.isoweekday() < 7:
        date = date - datetime.timedelta(days=date.isoweekday())
    varname = ctx['var']
    pgconn = get_dbconn('coop')
    params = LOOKUP[varname]
    if isinstance(params[2], list):
        dlimit = " unit_desc in %s" % (str(tuple(params[2]), ))
    else:
        dlimit = " unit_desc = '%s' " % (params[2], )
    df = read_sql("""
        select year, week_ending,
        sum(num_value) as value, state_alpha from nass_quickstats
        where commodity_desc = %s and statisticcat_desc = %s
        and """ + dlimit + """ and
        util_practice_desc = %s
        and num_value is not null
        GROUP by year, week_ending, state_alpha
        ORDER by state_alpha, week_ending
    """,
                  pgconn,
                  params=(params[0], params[1], params[3]),
                  index_col=None)
    if df.empty:
        raise NoDataFound("No NASS Data was found for query, sorry.")
    df['week_ending'] = pd.to_datetime(df['week_ending'])
    data = {}
    # Average atleast ten years
    syear = max([1981, date.year - 10])
    eyear = syear + 10
    for state, gdf in df.groupby('state_alpha'):
        sdf = gdf.copy()
        sdf.set_index('week_ending', inplace=True)
        # TOO DIFFICULT to know what to do in this case.
        if date.strftime("%Y-%m-%d") not in sdf.index:
            continue
        thisval = sdf.loc[date.strftime("%Y-%m-%d")]['value']
        # linear interpolate data to get comparables
        newdf = sdf.resample('D').interpolate(method='linear')
        # get doy averages
        y10 = newdf[(newdf['year'] >= syear) & (newdf['year'] < eyear)]
        if y10.empty:
            avgval = None
        else:
            doyavgs = y10.groupby(y10.index.strftime("%m%d")).mean()
            if date.strftime("%m%d") in doyavgs.index:
                avgval = doyavgs.at[date.strftime("%m%d"), 'value']
            else:
                avgval = None
        data[state] = {'avg': avgval, 'thisval': thisval}
    ctx['df'] = pd.DataFrame.from_dict(data, orient='index')
    if ctx['df'].empty:
        raise NoDataFound("No Data Found.")
    ctx['df'].dropna(how='all', inplace=True)
    ctx['df'].index.name = 'state'
    ctx['df']['departure'] = ctx['df']['thisval'] - ctx['df']['avg']
    ctx['title'] = ("%s USDA NASS %s" %
                    (date.strftime("%-d %B %Y"), PDICT[varname]))
    ctx['subtitle'] = ('Top value is %i percentage, bottom value is '
                       'departure from %i-%i avg' %
                       (date.year, syear, eyear - 1))

Example #36

0

Show file

File: p110.py Project: trentford/iem

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn('coop')

    station = fdict.get('station', 'IA0200')

    table = "alldata_%s" % (station[:2], )
    nt = NetworkTable("%sCLIMATE" % (station[:2], ))
    df = read_sql("""
    with events as (
        SELECT c.climoweek, a.precip, a.year from """ + table + """ a
        JOIN climoweek c on (c.sday = a.sday) WHERE a.station = %s
        and precip >= 0.01),
    ranks as (
        SELECT climoweek, year,
        rank() OVER (PARTITION by climoweek ORDER by precip DESC)
        from events),
    stats as (
    SELECT climoweek, max(precip), avg(precip),
    sum(case when precip >= 0.01 and precip < 0.26 then 1 else 0 end) as cat1,
    sum(case when precip >= 0.26 and precip < 0.51 then 1 else 0 end) as cat2,
    sum(case when precip >= 0.51 and precip < 1.01 then 1 else 0 end) as cat3,
    sum(case when precip >= 1.01 and precip < 2.01 then 1 else 0 end) as cat4,
    sum(case when precip >= 2.01 then 1 else 0 end) as cat5,
    count(*) from events GROUP by climoweek)
    SELECT e.climoweek, e.max, r.year, e.avg, e.cat1, e.cat2, e.cat3, e.cat4,
    e.cat5 from
    stats e JOIN ranks r on (r.climoweek = e.climoweek) WHERE r.rank = 1
    ORDER by e.climoweek ASC
    """,
                  pgconn,
                  params=(station, ),
                  index_col=None)

    res = """\
# IEM Climodat https://mesonet.agron.iastate.edu/climodat/
# Report Generated: %s
# Climate Record: %s -> %s
# Site Information: [%s] %s
# Contact Information: Daryl Herzmann [email protected] 515.294.5978
# Based on climoweek periods, this report summarizes liquid precipitation.
#                                     Number of precip events - (%% of total)
 CL                MAX         MEAN   0.01-    0.26-    0.51-    1.01-            TOTAL
 WK TIME PERIOD    VAL  YR     RAIN     0.25     0.50     1.00     2.00    >2.01  DAYS
""" % (datetime.date.today().strftime("%d %b %Y"),
       nt.sts[station]['archive_begin'].date(), datetime.date.today(), station,
       nt.sts[station]['name'])

    annEvents = 0
    cat1t = 0
    cat2t = 0
    cat3t = 0
    cat4t = 0
    cat5t = 0
    maxRain = 0
    totRain = 0
    lastcw = 0
    for _, row in df.iterrows():
        cw = int(row["climoweek"])
        # Skip ties
        if cw == lastcw:
            continue
        lastcw = cw
        cat1 = row["cat1"]
        cat2 = row["cat2"]
        cat3 = row["cat3"]
        cat4 = row["cat4"]
        cat5 = row["cat5"]
        cat1t += cat1
        cat2t += cat2
        cat3t += cat3
        cat4t += cat4
        cat5t += cat5
        maxval = row["max"]
        if maxval > maxRain:
            maxRain = maxval
        meanval = row["avg"]
        totEvents = cat1 + cat2 + cat3 + cat4 + cat5
        annEvents += totEvents
        totRain += (totEvents * meanval)

        res += ("%3s %-13s %5.2f %i   %4.2f %4i(%2i) %4i(%2i) "
                "%4i(%2i) %4i(%2i) %4i(%2i)   %4i\n") % (
                    cw, CWEEK[cw], maxval, row['year'], meanval, cat1,
                    round((float(cat1) / float(totEvents)) * 100.0), cat2,
                    round((float(cat2) / float(totEvents)) * 100.0), cat3,
                    round((float(cat3) / float(totEvents)) * 100.0), cat4,
                    round((float(cat4) / float(totEvents)) * 100.0), cat5,
                    round((float(cat5) / float(totEvents)) * 100.0), totEvents)

    res += ("%-17s %5.2f        %4.2f %4i(%2i) %4i(%2i) "
            "%4i(%2i) %4i(%2i) %4i(%2i)  %5i\n") % (
                "ANNUAL TOTALS", maxRain, totRain / annEvents, cat1t,
                (float(cat1t) / float(annEvents)) * 100, cat2t,
                (float(cat2t) / float(annEvents)) * 100, cat3t,
                (float(cat3t) / float(annEvents)) * 100, cat4t,
                (float(cat4t) / float(annEvents)) * 100, cat5t,
                (float(cat5t) / float(annEvents)) * 100, annEvents)

    return None, df, res

Example #37

0

Show file

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    ctx = get_autoplot_context(fdict, get_description())
    station = ctx['station']
    network = ctx['network']
    year = ctx['year']
    season = ctx['season']
    nt = NetworkTable(network)
    table = "alldata_%s" % (station[:2], )

    pgconn = psycopg2.connect(database='coop', host='iemdb', user='******')

    # Have to do a redundant query to get the running values
    obs = read_sql("""
    WITH trail as (
        SELECT day, year,
        avg((high+low)/2.) OVER (ORDER by day ASC ROWS 91 PRECEDING) as avgt
        from """ + table + """ WHERE station = %s)

    SELECT day, avgt from trail WHERE year between %s and %s ORDER by day ASC
    """,
                   pgconn,
                   params=(station, year, year + 2),
                   index_col='day')

    df = read_sql("""
    WITH trail as (
        SELECT day, year,
        avg((high+low)/2.) OVER (ORDER by day ASC ROWS 91 PRECEDING) as avgt
        from """ + table + """ WHERE station = %s),
    extremes as (
        SELECT day, year, avgt,
        rank() OVER (PARTITION by year ORDER by avgt ASC) as minrank,
        rank() OVER (PARTITION by year ORDER by avgt DESC) as maxrank
        from trail),
    yearmax as (
        SELECT year, min(day) as summer_end, min(avgt) as summer
        from extremes where maxrank = 1 GROUP by year),
    yearmin as (
        SELECT year, min(day) as winter_end, min(avgt) as winter
        from extremes where minrank = 1 GROUP by year)

    SELECT x.year, winter_end, winter, summer_end, summer,
    extract(doy from winter_end)::int as winter_end_doy,
    extract(doy from summer_end)::int as summer_end_doy
    from yearmax x JOIN yearmin n on (x.year = n.year) ORDER by x.year ASC
    """,
                  pgconn,
                  params=(station, ),
                  index_col='year')
    # Throw out spring of the first year
    for col in ['winter', 'winter_end_doy', 'winter_end']:
        df.at[df.index.min(), col] = None

    # Need to cull current year
    if datetime.date.today().month < 8:
        for col in ['summer', 'summer_end_doy', 'summer_end']:
            df.at[datetime.date.today().year, col] = None
    if datetime.date.today().month < 2:
        for col in ['winter', 'winter_end_doy', 'winter_end']:
            df.at[datetime.date.today().year, col] = None
    df['spring_length'] = df['summer_end_doy'] - 91 - df['winter_end_doy']
    # fall is a bit tricker
    df['fall_length'] = None
    df['fall_length'].values[:-1] = ((df['winter_end_doy'].values[1:] + 365) -
                                     91 - df['summer_end_doy'].values[:-1])

    (fig, ax) = plt.subplots(3, 1, figsize=(8, 9))

    ax[0].plot(obs.index.values, obs['avgt'].values)
    ax[0].set_title(("%s-%s [%s] %s\n91 Day Average Temperatures") %
                    (nt.sts[station]['archive_begin'].year, year + 3, station,
                     nt.sts[station]['name']))
    ax[0].set_ylabel(r"Trailing 91 Day Avg T $^{\circ}$F")
    ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
    ax[0].grid(True)

    # Label the maxes and mins
    for yr in range(year, year + 3):
        if yr not in df.index:
            continue
        date = df.at[yr, 'winter_end']
        val = df.at[yr, 'winter']
        if date is not None:
            ax[0].text(date,
                       val - 5,
                       r"%s %.1f$^\circ$F" % (date.strftime("%-d %b"), val),
                       ha='center')
        date = df.at[yr, 'summer_end']
        val = df.at[yr, 'summer']
        if date is not None:
            ax[0].text(date,
                       val + 2,
                       r"%s %.1f$^\circ$F" % (date.strftime("%-d %b"), val),
                       ha='center')

    df2 = df.dropna()
    p2col = 'winter_end_doy' if season == 'spring' else 'summer_end_doy'
    slp, intercept, r, _, _ = stats.linregress(df2.index.values,
                                               df2[p2col].values)
    ax[1].scatter(df.index.values, df[p2col].values)
    ax[1].grid(True)
    # Do labelling
    yticks = []
    yticklabels = []
    for doy in range(int(df[p2col].min()), int(df[p2col].max())):
        date = datetime.date(2000, 1, 1) + datetime.timedelta(days=(doy - 1))
        if date.day in [1, 15]:
            yticks.append(doy)
            yticklabels.append(date.strftime("%-d %b"))
    ax[1].set_yticks(yticks)
    ax[1].set_yticklabels(yticklabels)
    lbl = ("Date of Minimum (Spring Start)"
           if season == 'spring' else "Date of Maximum (Fall Start)")
    ax[1].set_ylabel(lbl)
    ax[1].set_xlim(df.index.min() - 1, df.index.max() + 1)
    avgv = df[p2col].mean()
    ax[1].axhline(avgv, color='r')
    ax[1].plot(df.index.values, intercept + (df.index.values * slp))
    d = (datetime.date(2000, 1, 1) +
         datetime.timedelta(days=int(avgv))).strftime("%-d %b")
    ax[1].text(0.02,
               0.02,
               r"$\frac{\Delta days}{decade} = %.2f,R^2=%.2f, avg = %s$" %
               (slp * 10.0, r**2, d),
               va='bottom',
               transform=ax[1].transAxes)
    ax[1].set_ylim(bottom=(ax[1].get_ylim()[0] - 10))

    p3col = 'spring_length' if season == 'spring' else 'fall_length'
    slp, intercept, r, _, _ = stats.linregress(df2.index.values, df2[p3col])
    ax[2].scatter(df.index.values, df[p3col])
    ax[2].set_xlim(df.index.min() - 1, df.index.max() + 1)
    ax[2].set_ylabel("Length of '%s' [days]" % (season.capitalize(), ))
    ax[2].grid(True)
    avgv = df[p3col].mean()
    ax[2].axhline(avgv, color='r')
    ax[2].plot(df.index.values, intercept + (df.index.values * slp))
    ax[2].text(0.02,
               0.02,
               r"$\frac{\Delta days}{decade} = %.2f,R^2=%.2f, avg = %.1fd$" %
               (slp * 10.0, r**2, avgv),
               va='bottom',
               transform=ax[2].transAxes)
    ax[2].set_ylim(bottom=(ax[2].get_ylim()[0] - 15))

    return fig, df

Example #38

0

Show file

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn('coop')
    ctx = get_autoplot_context(fdict, get_description())
    station = ctx['station']
    delta = ctx['delta']
    year = ctx['year']
    nt = network.Table("%sCLIMATE" % (station[:2], ))

    table = "alldata_%s" % (station[:2], )
    df = read_sql("""
        WITH days as (
            select generate_series('%s-01-01'::date, '%s-12-31'::date,
                '1 day'::interval)::date as day,
                to_char(generate_series('%s-01-01'::date, '%s-12-31'::date,
                '1 day'::interval)::date, 'mmdd') as sday
        ),
        climo as (
            SELECT sday, avg(high) as avg_high, stddev(high) as stddev_high,
            avg(low) as avg_low, stddev(low) as stddev_low from """ + table +
                  """
            WHERE station = %s GROUP by sday
        ),
        thisyear as (
            SELECT day, sday, high, low from """ + table + """
            WHERE station = %s and year = %s
        ),
        thisyear2 as (
            SELECT d.day, d.sday, t.high, t.low from days d LEFT JOIN
            thisyear t on (d.sday = t.sday)
        )
        SELECT t.day, t.sday, t.high, t.low, c.avg_high, c.avg_low,
        c.stddev_high, c.stddev_low from thisyear2 t JOIN climo c on
        (t.sday = c.sday) ORDER by t.day ASC
    """,
                  pgconn,
                  params=(year, year, year, year, station, station, year),
                  index_col='day')
    df.index.name = 'Date'
    df['high_sigma'] = (df['high'] - df['avg_high']) / df['stddev_high']
    df['low_sigma'] = (df['low'] - df['avg_low']) / df['stddev_low']

    (fig, ax) = plt.subplots(2, 1, sharex=True, figsize=(8, 6))

    ax[0].plot(df.index,
               df.avg_high,
               color='r',
               linestyle='-',
               label='Climate High')
    ax[0].plot(df.index, df.avg_low, color='b', label='Climate Low')
    ax[0].set_ylabel(r"Temperature $^\circ\mathrm{F}$")
    ax[0].set_title("[%s] %s Climatology & %s Observations" %
                    (station, nt.sts[station]['name'], year))

    ax[0].plot(df.index, df.high, color='brown', label='%s High' % (year, ))
    ax[0].plot(df.index, df.low, color='green', label='%s Low' % (year, ))

    if delta == 'abs':
        ax[1].plot(df.index,
                   df.high - df.avg_high,
                   color='r',
                   label='High Diff %s - Climate' % (year))
        ax[1].plot(df.index, df.low - df.avg_low, color='b', label='Low Diff')
        ax[1].set_ylabel(r"Temp Difference $^\circ\mathrm{F}$")
    else:
        ax[1].plot(df.index,
                   df.high_sigma,
                   color='r',
                   label='High Diff %s - Climate' % (year))
        ax[1].plot(df.index, df.low_sigma, color='b', label='Low Diff')
        ax[1].set_ylabel(r"Temp Difference $\sigma$")
        ymax = max([df.high_sigma.abs().max(), df.low_sigma.abs().max()]) + 1
        ax[1].set_ylim(0 - ymax, ymax)
    ax[1].legend(fontsize=10, ncol=2, loc='best')
    ax[1].grid(True)

    ax[0].legend(fontsize=10, ncol=2, loc=8)
    ax[0].grid()
    ax[0].xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%-d\n%b'))
    ax[0].set_xlim(df.index.min() - datetime.timedelta(days=3),
                   df.index.max() + datetime.timedelta(days=3))

    return fig, df

Example #39

0

Show file

File: hourly_analysis.py Project: smartparrot/iem

def grid_hour(ts):
    """
    I proctor the gridding of data on an hourly basis
    @param ts Timestamp of the analysis, we'll consider a 20 minute window
    """
    pprint("grid_hour called...")
    nc = ncopen(iemre.get_hourly_ncname(ts.year), "a", timeout=300)
    domain = nc.variables["hasdata"][:, :]
    nc.close()
    ts0 = ts - datetime.timedelta(minutes=10)
    ts1 = ts + datetime.timedelta(minutes=10)
    utcnow = datetime.datetime.utcnow()
    utcnow = utcnow.replace(tzinfo=pytz.utc) - datetime.timedelta(hours=36)

    # If we are near realtime, look in IEMAccess instead of ASOS database
    mybuf = 2.0
    params = (
        iemre.WEST - mybuf,
        iemre.SOUTH - mybuf,
        iemre.WEST - mybuf,
        iemre.NORTH + mybuf,
        iemre.EAST + mybuf,
        iemre.NORTH + mybuf,
        iemre.EAST + mybuf,
        iemre.SOUTH - mybuf,
        iemre.WEST - mybuf,
        iemre.SOUTH - mybuf,
        ts0,
        ts1,
    )
    if utcnow < ts:
        dbconn = get_dbconn("iem", user="******")
        sql = """SELECT t.id as station, ST_x(geom) as lon,
        ST_y(geom) as lat,
 max(case when tmpf > -60 and tmpf < 130 THEN tmpf else null end) as max_tmpf,
 max(case when sknt > 0 and sknt < 100 then sknt else 0 end) as max_sknt,
 max(getskyc(skyc1)) as max_skyc1,
 max(getskyc(skyc2)) as max_skyc2,
 max(getskyc(skyc3)) as max_skyc3,
 max(case when phour > 0 and phour < 1000 then phour else 0 end) as phour,
 max(case when dwpf > -60 and dwpf < 100 THEN dwpf else null end) as max_dwpf,
 max(case when sknt >= 0 then sknt else 0 end) as sknt,
 max(case when sknt >= 0 then drct else 0 end) as drct
 from current_log s JOIN stations t on (s.iemid = t.iemid)
 WHERE ST_Contains(
  ST_GeomFromEWKT('SRID=4326;POLYGON((%s %s, %s  %s, %s %s, %s %s, %s %s))'),
  geom) and valid >= %s and valid < %s GROUP by station, lon, lat
         """
    else:
        dbconn = get_dbconn("asos", user="******")
        sql = """SELECT station, ST_x(geom) as lon, st_y(geom) as lat,
 max(case when tmpf > -60 and tmpf < 130 THEN tmpf else null end) as max_tmpf,
 max(case when sknt > 0 and sknt < 100 then sknt else 0 end) as max_sknt,
 max(getskyc(skyc1)) as max_skyc1,
 max(getskyc(skyc2)) as max_skyc2,
 max(getskyc(skyc3)) as max_skyc3,
 max(case when p01i > 0 and p01i < 1000 then p01i else 0 end) as phour,
 max(case when dwpf > -60 and dwpf < 100 THEN dwpf else null end) as max_dwpf,
 max(case when sknt >= 0 then sknt else 0 end) as sknt,
 max(case when sknt >= 0 then drct else 0 end) as drct
 from alldata a JOIN stations t on (a.station = t.id) WHERE
 ST_Contains(
  ST_GeomFromEWKT('SRID=4326;POLYGON((%s %s, %s  %s, %s %s, %s %s, %s %s))'),
  geom) and (t.network ~* 'ASOS' or t.network = 'AWOS') and
 valid >= %s and valid < %s GROUP by station, lon, lat"""

    df = read_sql(sql, dbconn, params=params, index_col="station")
    pprint("got database results")
    if df.empty:
        print(("%s has no entries, FAIL") % (ts.strftime("%Y-%m-%d %H:%M"), ))
        return
    ures, vres = grid_wind(df, domain)
    pprint("grid_wind is done")
    if ures is None:
        print("iemre.hourly_analysis failure for uwnd at %s" % (ts, ))
    else:
        write_grid(ts, "uwnd", ures)
        write_grid(ts, "vwnd", vres)

    tmpf = generic_gridder(df, "max_tmpf", domain)
    pprint("grid tmpf is done")
    if tmpf is None:
        print("iemre.hourly_analysis failure for tmpk at %s" % (ts, ))
    else:
        dwpf = generic_gridder(df, "max_dwpf", domain)
        pprint("grid dwpf is done")
        # require that dwpk <= tmpk
        mask = ~np.isnan(dwpf)
        mask[mask] &= dwpf[mask] > tmpf[mask]
        dwpf = np.where(mask, tmpf, dwpf)
        write_grid(ts, "tmpk",
                   masked_array(tmpf, data_units="degF").to("degK"))
        write_grid(ts, "dwpk",
                   masked_array(dwpf, data_units="degF").to("degK"))

    res = grid_skyc(df, domain)
    pprint("grid skyc is done")
    if res is None:
        print("iemre.hourly_analysis failure for skyc at %s" % (ts, ))
    else:
        write_grid(ts, "skyc", res)

Example #40

0

Show file

File: p177.py Project: trentford/iem

def make_daily_water_change_plot(ctx):
    """Option 7"""
    # Get daily precip
    pdf = read_sql("""
    SELECT valid, rain_mm_tot_qc from sm_daily where station = %s
    and valid >= %s and valid <= %s ORDER by valid ASC
    """,
                   ctx['pgconn'],
                   params=(ctx['station'], ctx['sts'].date(),
                           ctx['ets'].date()),
                   index_col='valid')

    df = read_sql("""
    WITH obs as (
        SELECT valid,
    CASE WHEN t12_c_avg_qc > 1 then calc_vwc_12_avg_qc else null end as v12,
    CASE WHEN t24_c_avg_qc > 1 then calc_vwc_24_avg_qc else null end as v24,
    CASE WHEN t50_c_avg_qc > 1 then calc_vwc_50_avg_qc else null end as v50
        from sm_daily
        where station = %s and valid >= %s and valid < %s)

    SELECT valid,
    v12, v12 - lag(v12) OVER (ORDER by valid ASC) as v12_delta,
    v24, v24 - lag(v24) OVER (ORDER by valid ASC) as v24_delta,
    v50, v50 - lag(v50) OVER (ORDER by valid ASC) as v50_delta
    from obs ORDER by valid ASC
    """,
                  ctx['pgconn'],
                  params=(ctx['station'], ctx['sts'], ctx['ets']),
                  index_col=None)
    # df.interpolate(inplace=True, axis=1, method='nearest')
    l1 = 12.
    l2 = 12.
    l3 = 0.
    df['change'] = (df['v12_delta'] * l1 + df['v24_delta'] * l2 +
                    df['v50_delta'] * l3)
    df['depth'] = df['v12'] * l1 + df['v24'] * l2 + df['v50'] * l3

    (fig, ax) = plt.subplots(2, 1, sharex=True)
    if not df['depth'].isnull().all():
        ax[0].plot(df['valid'].values, df['depth'], color='b', lw=2)
    oneday = datetime.timedelta(days=1)
    for level in [0.15, 0.25, 0.35, 0.45]:
        ax[0].axhline((l1 + l2 + l3) * level, c='k')
        ax[0].text(df['valid'].values[-1] + oneday, (l1 + l2 + l3) * level,
                   "  %.0f%%" % (level * 100., ),
                   va='center')
    ax[0].grid(True)
    ax[0].set_ylabel("Water Depth [inch]")
    ax[0].set_title(
        ("ISUSM Station: %s Daily Soil (6-30\") Water\n"
         "For un-frozen condition between %s and %s") %
        (ctx['nt'].sts[ctx['station']]['name'],
         ctx['sts'].strftime("%-d %b %Y"), ctx['ets'].strftime("%-d %b %Y")))
    bars = ax[1].bar(df['valid'].values, df['change'].values, fc='b', ec='b')
    for mybar in bars:
        if mybar.get_y() < 0:
            mybar.set_facecolor('r')
            mybar.set_edgecolor('r')
    ax[1].set_ylabel("Soil Water Change [inch]")
    ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%-d %b\n%Y'))
    interval = int(len(df.index) / 7 + 1)
    ax[1].xaxis.set_major_locator(mdates.DayLocator(interval=interval))
    ax[1].grid(True)

    if (ctx['ets'] - ctx['sts']).total_seconds() < (60 * 86400):
        ylim = ax[1].get_ylim()[1]
        # Attempt to place precip text above this plot
        pdf['pday'] = distance(pdf['rain_mm_tot_qc'].values, 'MM').value('IN')
        for valid, row in pdf.iterrows():
            if row['pday'] > 0:
                ax[1].text(valid,
                           ylim,
                           "%.2f" % (row['pday'], ),
                           rotation=90,
                           va='bottom',
                           color='b')
        ax[1].text(-0.01,
                   1.05,
                   "Rain ->",
                   ha='right',
                   transform=ax[1].transAxes,
                   color='b')
    ax[0].set_xlim(df['valid'].min() - oneday, df['valid'].max() + oneday)
    return fig, df

Example #41

0

Show file

File: p177.py Project: trentford/iem

def plot2(ctx):
    """Just soil temps"""
    df = read_sql("""SELECT * from sm_hourly WHERE
        station = %s and valid BETWEEN %s and %s ORDER by valid ASC
        """,
                  ctx['pgconn'],
                  params=(ctx['station'], ctx['sts'], ctx['ets']),
                  index_col='valid')
    d12t = df['t12_c_avg_qc']
    d24t = df['t24_c_avg_qc']
    d50t = df['t50_c_avg_qc']
    tsoil = df['tsoil_c_avg_qc']
    valid = df.index.values

    # maxy = max([np.max(d12sm), np.max(d24sm), np.max(d50sm)])
    # miny = min([np.min(d12sm), np.min(d24sm), np.min(d50sm)])

    (fig, ax) = plt.subplots(1, 1)
    ax.grid(True)
    ax.set_title(("ISUSM Station: %s Timeseries\n"
                  "Soil Temperature at Depth\n ") %
                 (ctx['nt'].sts[ctx['station']]['name'], ))
    ax.plot(valid,
            temperature(tsoil, 'C').value('F'),
            linewidth=2,
            color='brown',
            label='4 inch')
    if not d12t.isnull().any():
        ax.plot(valid,
                temperature(d12t, 'C').value('F'),
                linewidth=2,
                color='r',
                label='12 inch')
    if not d24t.isnull().any():
        ax.plot(valid,
                temperature(d24t, 'C').value('F'),
                linewidth=2,
                color='purple',
                label='24 inch')
    if not d50t.isnull().any():
        ax.plot(valid,
                temperature(d50t, 'C').value('F'),
                linewidth=2,
                color='black',
                label='50 inch')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height * 0.9])
    ax.legend(bbox_to_anchor=(0.5, 1.02), ncol=4, loc='center', fontsize=12)
    days = (ctx['ets'] - ctx['sts']).days
    if days >= 3:
        interval = max(int(days / 7), 1)
        ax.xaxis.set_major_locator(
            mdates.DayLocator(interval=interval,
                              tz=pytz.timezone("America/Chicago")))
        ax.xaxis.set_major_formatter(
            mdates.DateFormatter('%-d %b\n%Y',
                                 tz=pytz.timezone("America/Chicago")))
    else:
        ax.xaxis.set_major_locator(
            mdates.AutoDateLocator(maxticks=10,
                                   tz=pytz.timezone("America/Chicago")))
        ax.xaxis.set_major_formatter(
            mdates.DateFormatter('%-I %p\n%d %b',
                                 tz=pytz.timezone("America/Chicago")))
    if ax.get_ylim()[0] < 40:
        ax.axhline(32, linestyle='--', lw=2, color='tan')
    ax.set_ylabel(r"Temperature $^\circ$F")
    return fig, df

Example #42

0

Show file

File: p177.py Project: trentford/iem

def plot1(ctx):
    """Do main plotting logic"""
    df = read_sql("""
        SELECT * from sm_hourly WHERE
        station = %s and valid BETWEEN %s and %s ORDER by valid ASC
    """,
                  ctx['pgconn'],
                  params=(ctx['station'], ctx['sts'], ctx['ets']),
                  index_col='valid')
    if df.empty:
        raise ValueError("No Data Found for This Plot.")
    slrkw = df['slrkw_avg_qc']
    d12sm = df['calc_vwc_12_avg_qc']
    d12t = df['t12_c_avg_qc']
    d24t = df['t24_c_avg_qc']
    d50t = df['t50_c_avg_qc']
    d24sm = df['calc_vwc_24_avg_qc']
    d50sm = df['calc_vwc_50_avg_qc']
    rain = df['rain_mm_tot_qc']
    tair = df['tair_c_avg_qc']
    tsoil = df['tsoil_c_avg_qc']
    valid = df.index.values

    (fig, ax) = plt.subplots(3, 1, sharex=True, figsize=(8, 8))
    ax[0].grid(True)
    ax2 = ax[0].twinx()
    ax[0].set_zorder(ax2.get_zorder() + 1)
    ax[0].patch.set_visible(False)
    # arange leads to funky values
    ax2.set_yticks([-0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0])
    ax2.set_yticklabels([0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0])
    ax2.set_ylim(-0.6, 0)
    ax2.set_ylabel("Hourly Precipitation [inch]")
    b1 = ax2.bar(valid, 0 - rain / 25.4, width=0.04, fc='b', ec='b', zorder=4)

    l1 = None
    l2 = None
    l3 = None
    if not d12sm.isnull().all():
        l1, = ax[0].plot(valid,
                         d12sm * 100.0,
                         linewidth=2,
                         color='r',
                         zorder=5)
    if not d24sm.isnull().all():
        l2, = ax[0].plot(valid,
                         d24sm * 100.0,
                         linewidth=2,
                         color='purple',
                         zorder=5)
    if not d50sm.isnull().all():
        l3, = ax[0].plot(valid,
                         d50sm * 100.0,
                         linewidth=2,
                         color='black',
                         zorder=5)
    ax[0].set_ylabel("Volumetric Soil Water Content [%]", fontsize=10)

    days = (ctx['ets'] - ctx['sts']).days
    if days >= 3:
        interval = max(int(days / 7), 1)
        ax[0].xaxis.set_major_locator(
            mdates.DayLocator(interval=interval,
                              tz=pytz.timezone("America/Chicago")))
        ax[0].xaxis.set_major_formatter(
            mdates.DateFormatter('%-d %b\n%Y',
                                 tz=pytz.timezone("America/Chicago")))
    else:
        ax[0].xaxis.set_major_locator(
            mdates.AutoDateLocator(maxticks=10,
                                   tz=pytz.timezone("America/Chicago")))
        ax[0].xaxis.set_major_formatter(
            mdates.DateFormatter('%-I %p\n%d %b',
                                 tz=pytz.timezone("America/Chicago")))

    ax[0].set_title(("ISUSM Station: %s Timeseries") %
                    (ctx['nt'].sts[ctx['station']]['name'], ))
    box = ax[0].get_position()
    ax[0].set_position(
        [box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95])
    box = ax2.get_position()
    ax2.set_position(
        [box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95])
    if None not in [l1, l2, l3]:
        ax[0].legend([l1, l2, l3, b1],
                     ['12 inch', '24 inch', '50 inch', 'Hourly Precip'],
                     bbox_to_anchor=(0.5, -0.15),
                     ncol=4,
                     loc='center',
                     fontsize=12)

    # ----------------------------------------
    if not d12t.isnull().all():
        ax[1].plot(valid,
                   temperature(d12t, 'C').value('F'),
                   linewidth=2,
                   color='r',
                   label='12in')
    if not d24t.isnull().all():
        ax[1].plot(valid,
                   temperature(d24t, 'C').value('F'),
                   linewidth=2,
                   color='purple',
                   label='24in')
    if not d50t.isnull().all():
        ax[1].plot(valid,
                   temperature(d50t, 'C').value('F'),
                   linewidth=2,
                   color='black',
                   label='50in')
    ax[1].grid(True)
    ax[1].set_ylabel(r"Temperature $^\circ$F")
    box = ax[1].get_position()
    ax[1].set_position(
        [box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95])

    # ------------------------------------------------------

    ax2 = ax[2].twinx()
    l3, = ax2.plot(valid, slrkw, color='g', zorder=1, lw=2)
    ax2.set_ylabel("Solar Radiation [W/m^2]", color='g')

    l1, = ax[2].plot(valid,
                     temperature(tair, 'C').value('F'),
                     linewidth=2,
                     color='blue',
                     zorder=2)
    l2, = ax[2].plot(valid,
                     temperature(tsoil, 'C').value('F'),
                     linewidth=2,
                     color='brown',
                     zorder=2)
    ax[2].grid(True)
    ax[2].legend([l1, l2, l3], ['Air', '4" Soil', 'Solar Radiation'],
                 bbox_to_anchor=(0.5, 1.1),
                 loc='center',
                 ncol=3)
    ax[2].set_ylabel(r"Temperature $^\circ$F")

    ax[2].set_zorder(ax2.get_zorder() + 1)
    ax[2].patch.set_visible(False)
    ax[0].set_xlim(df.index.min(), df.index.max())
    return fig, df

Example #43

0

Show file

File: p168.py Project: jamayfieldjr/iem

def plotter(fdict):
    """ Go """
    pgconn = util.get_dbconn('coop')
    ctx = util.get_autoplot_context(fdict, get_description())
    station = ctx['station']
    table = "alldata_%s" % (station[:2], )
    df = read_sql("""
    with data as (
        select day, high, year,
        rank() OVER (PARTITION by high ORDER by sday DESC)
        from """ + table + """ where station = %s)
    SELECT day, year, high, rank from data WHERE rank = 1
    ORDER by high DESC, day DESC
    """,
                  pgconn,
                  params=(station, ),
                  index_col=None)
    if df.empty:
        raise NoDataFound("No data found!")

    (fig, ax) = plt.subplots(1, 1, figsize=(6, 8))
    current = {
        'd2000': datetime.date(2000, 1, 1),
        'date': datetime.date(2000, 1, 1),
        'ties': False
    }
    x = []
    y = []
    for level in np.arange(df['high'].max(), 0, -1):
        if level not in df['high']:
            continue
        df2 = df[df['high'] == level]
        row = df2.iloc[0]
        if row['day'].replace(year=2000) > current['d2000']:
            current['d2000'] = row['day'].replace(year=2000)
            current['date'] = row['day']
            current['ties'] = (len(df2.index) > 1)
        if current['date'].month == 12 and current['date'].day > 20:
            break
        y.append(level)
        x.append(int(current['d2000'].strftime("%j")))
        ax.text(x[-1] + 3,
                level,
                "%s -- %s %s%s" %
                (level, current['d2000'].strftime("%-d %b"),
                 current['date'].year, " **" if current['ties'] else ""),
                va='center')
    ax.barh(y, x, align='center')
    ax.set_xticks((1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 365))
    ax.set_xticklabels(calendar.month_abbr[1:])
    ax.set_xlim(min(x) - 5, 400)
    ax.set_ylim(y[-1] - 1, y[0] + 1)
    ax.grid(True)
    ab = ctx['_nt'].sts[station]['archive_begin']
    if ab is None:
        raise NoDataFound("Unknown station metadata.")
    ax.set_title(
        ("Most Recent & Latest Date of High Temperature\n"
         "[%s] %s (%s-%s)") % (station, ctx['_nt'].sts[station]['name'],
                               ab.year, datetime.date.today().year))
    ax.set_ylabel(r"High Temperature $^\circ$F")
    ax.set_xlabel("** denotes ties")

    return fig, df

Example #44

0

Show file

File: p103.py Project: nbackas/iem

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    pgconn = psycopg2.connect(database='coop', host='iemdb', user='******')

    station = fdict.get('station', 'IA2203')
    season = fdict.get('season', 'fall')
    table = "alldata_%s" % (station[:2],)
    nt = network.Table("%sCLIMATE" % (station[:2],))

    df = read_sql("""
    WITH obs as (
        SELECT day, year, month, high, low,
        case when month > 6 then 'fall' else 'spring' end as season
        from """ + table + """ WHERE station = %s),
    data as (
        SELECT year, day, season,
        max(high) OVER (PARTITION by year, season ORDER by day ASC
                        ROWS BETWEEN 183 PRECEDING and CURRENT ROW) as mh,
        min(low) OVER (PARTITION by year, season ORDER by day ASC
                       ROWS BETWEEN 183 PRECEDING and CURRENT ROW) as ml
        from obs),
    lows as (
        SELECT year, day, ml as level, season,
        rank() OVER (PARTITION by year, ml ORDER by day ASC) from data
        WHERE season = 'fall'),
    highs as (
        SELECT year, day, mh as level, season,
        rank() OVER (PARTITION by year, mh ORDER by day ASC) from data
        WHERE season = 'spring')

    (SELECT year, day, extract(doy from day) as doy,
     level, season from lows WHERE rank = 1) UNION
    (SELECT year, day, extract(doy from day) as doy,
     level, season from highs WHERE rank = 1)
    """, pgconn, params=[station])
    df2 = df[df['season'] == season]
    (fig, ax) = plt.subplots(3, 1, figsize=(7, 10))
    dyear = df2.groupby(['year']).count()
    ax[0].bar(dyear.index, dyear['level'], facecolor='tan', edgecolor='tan')
    ax[0].axhline(dyear['level'].mean(), lw=2)
    ax[0].set_ylabel("Yearly Events Avg: %.1f" % (dyear['level'].mean(), ))
    ax[0].set_xlim(dyear.index.min()-1, dyear.index.max()+1)
    title = "%s Steps %s" % (PDICT[season],
                             "Down" if season == 'fall' else 'Up')
    ax[0].set_title("%s [%s]\n%s in Temperature" % (nt.sts[station]['name'],
                                                    station, title))
    ax[0].grid(True)

    ax[1].hist(np.array(df2['level'], 'f'),
               bins=np.arange(df2['level'].min(),
                              df2['level'].max()+1, 2),
               normed=True, facecolor='tan')
    ax[1].set_ylabel("Probability Density")
    ax[1].axvline(32, lw=2)
    ax[1].grid(True)
    ax[1].set_xlabel("Temperature $^\circ$F, 32 degrees highlighted")

    ax[2].hist(np.array(df2['doy'], 'f'),
               bins=np.arange(df2['doy'].min(),
                              df2['doy'].max()+1, 3),
               normed=True, facecolor='tan')
    ax[2].set_xticks((1, 32, 60, 91, 121, 152, 182, 213, 244, 274,
                      305, 335, 365))
    ax[2].set_xticklabels(calendar.month_abbr[1:])
    ax[2].set_xlim(df2['doy'].min() - 3,
                   df2['doy'].max() + 3)

    ax[2].set_ylabel("Probability Density")
    ax[2].grid(True)
    ax[2].set_xlabel("Day of Year, 3 Day Bins")

    return fig, df

Example #45

0

Show file

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn('coop')
    ctx = get_autoplot_context(fdict, get_description())
    station = ctx['station']
    network = ctx['network']
    year1 = ctx.get('year1')
    year2 = ctx.get('year2')
    year3 = ctx.get('year3')
    nt = NetworkTable(network)
    table = "alldata_%s" % (station[:2], )
    df = read_sql("""
    WITH years as (SELECT distinct year from """ + table + """
        WHERE station = %s and sday = '0101')
    SELECT day, sday, year, precip,
    sum(precip) OVER (PARTITION by year ORDER by day ASC) as accum from
    """ + table + """ WHERE station = %s and year in (select year from years)
    ORDER by day ASC
    """,
                  pgconn,
                  params=(station, station),
                  index_col='day')
    if df.empty:
        raise ValueError("No data found!")

    (fig, ax) = plt.subplots(1, 1)
    # Average
    jday = df[['sday', 'accum']].groupby('sday').mean()
    ax.plot(range(1,
                  len(jday.index) + 1),
            jday['accum'],
            lw=2,
            zorder=5,
            color='k',
            label='Average - %.2f' % (jday['accum'].iloc[-1], ))

    # Min and Max
    jmin = df[['sday', 'accum']].groupby('sday').min()
    jmax = df[['sday', 'accum']].groupby('sday').max()
    ax.fill_between(range(1,
                          len(jday.index) + 1),
                    jmin['accum'],
                    jmax['accum'],
                    zorder=2,
                    color='tan')

    # find max year
    plotted = []
    for year, color in zip([
            df['accum'].idxmax().year,
            df[df['sday'] == '1231']['accum'].idxmin().year, year1, year2,
            year3
    ], ['b', 'brown', 'r', 'g', 'purple']):
        if year is None or year in plotted:
            continue
        plotted.append(year)
        df2 = df[df['year'] == year]
        ax.plot(range(1,
                      len(df2.index) + 1),
                df2['accum'],
                label='%s - %.2f' % (year, df2['accum'].iloc[-1]),
                color=color,
                lw=2)

    ax.set_title(
        ("Year to Date Accumulated Precipitation\n"
         "[%s] %s (%s-%s)") %
        (station, nt.sts[station]['name'],
         nt.sts[station]['archive_begin'].year, datetime.date.today().year))
    ax.set_ylabel("Precipitation [inch]")
    ax.grid(True)
    ax.legend(loc=2)
    ax.set_xlim(1, 366)
    ax.set_xticks((1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 365))
    ax.set_xticklabels(calendar.month_abbr[1:])

    return fig, df

Example #46

0

Show file

File: PSQL create multi sheets one Excel WB.py Project: mhaserodt/PythonRepo

captured = """
select 
a.column::date as "Date"
,b.Column2 as "RefNumber"
,round(a.column::numeric, 2) as "Amount"
from schema.table a
inner join  schema2.table2  b
on a.column1 = b.column2
where a.column = 'keyword'
and (a.datecolumn::date >= date_trunc('month', current_date - interval '1' month)
and a.datecolumna::date < date_trunc('month', current_date))
order by 1 asc, 2 asc
limit 5000
"""
#create variable DF from the SQL results
df = read_sql(captured, conn, coerce_float=True)
#show what's in DF
print(df)

#write sheet LastMonth Captured in the above created excel file and add results stored in DF
df.to_excel(writer, sheet_name=capturedsheet, index=False)

#Sales sheet SQL code
sales = """
select 
date_column::date as "Date"
,column_Ref_number as "RefNumber"
,Column_amount::numeric as "Amount"
from schema.table
where column_ID = 'ID number'
and date_column::date >= date_trunc('month', current_date - interval '1' month)

Example #47

0

Show file

import matplotlib as mpl
mpl.use('Agg')
mpl.rcParams['figure.dpi'] = 150
import matplotlib.pyplot as plt
import numpy as np
import tool.acqconv
from tqdm import tqdm

if len(sys.argv) < 3:
	sys.exit(1)

con = pg.connect("dbname=eusospb_data user=eusospb password=1e20eVjemeuso host=localhost")

dataframe_9ec = psql.read_sql(
    "SELECT " \
    "event_id, global_gtu, gtu_in_packet, packet_id, num_triggered_pixels, source_file_acquisition_full, x_y_active_pixels_num "\
    "FROM spb_processing_event_ver2 " \
    "WHERE source_file_acquisition LIKE 'allpackets-SPBEUSO-ACQUISITION-2017%' AND source_file_acquisition NOT LIKE '%sqz.root' AND " \
    " x_y_active_pixels_num > 2048 ORDER BY x_y_active_pixels_num DESC LIMIT {limit:d} OFFSET {offset:d}".format(limit=int(sys.argv[1]),offset=int(sys.argv[2])),con)

#dataframe_9ec.head(100)

i=0
    
background_template_event_ids = []

plot_displayed = False

with open('bg_tmp_analysis_log_limit_{}_offset_{}.txt'.format(sys.argv[1],sys.argv[2]),"w") as logfile:
    i=0
    ax_i = 0
    for idx, row in tqdm(dataframe_9ec.iterrows()):

Example #48

0

Show file

File: _999_Recupera_Documentos_TODOS.py Project: anacarolinarocha/classificadorDeAssuntos

def recuperaDocumentos(regionais):

    for sigla_trt in regionais:
        # sigla_trt='07'
        print(
            "----------------------------------------------------------------------------"
        )
        print('Recuperando TODOS os documentos nao sigilosos para o TRT ' +
              sigla_trt)
        porta = '5' + sigla_trt + '2'
        nome_arquivo_documentos_selecionados = 'TRT_' + sigla_trt + '_2G_2010-2019_documentosSelecionados.csv'

        try:
            # -----------------------------------------------------------------------------------------------------------------------
            # Recupera os documentos
            conn = psycopg2.connect(dbname=dbname_2g,
                                    user=userbugfix,
                                    password=senhabugfix,
                                    host=ipbugfix,
                                    port=porta)
            # conn = psycopg2.connect(dbname='pje_2grau_consulta', user=userbugfix, password=senhabugfix, host=ipbugfix, port=porta)
            start_time = time.time()

            sql_count = """select max(id_processo_documento) from tb_processo_documento"""
            total_registros = (psql.read_sql(sql_count, conn))
            total_registros = total_registros['max'][0]
            print(
                'Encontrados ' + str(total_registros) +
                ' documentos no total na tabela tb_processo_documento do TRT '
                + sigla_trt)

            if os.path.isfile(path + nome_arquivo_documentos_selecionados):
                os.remove(path + nome_arquivo_documentos_selecionados)
            chunk_size = 1000
            offset = 1000
            dfs = []
            while True:
                # for i in range(1,5):
                sql = sql_original + " and doc.id_processo_documento > %d and doc.id_processo_documento < %d  limit %d " % (
                    offset - chunk_size, offset, chunk_size)
                dfs.append(psql.read_sql(sql, conn))
                if offset == 1000:
                    print('Primeiros dados recuperados ...' + sql[-100:])
                    dfs[-1].to_csv(path + nome_arquivo_documentos_selecionados,
                                   mode='a',
                                   header=True,
                                   quoting=csv.QUOTE_ALL)
                    a = dfs[-1]
                else:
                    dfs[-1].to_csv(path + nome_arquivo_documentos_selecionados,
                                   mode='a',
                                   header=False,
                                   quoting=csv.QUOTE_ALL)
                offset += chunk_size
                if offset > total_registros + chunk_size:
                    print('Ultimo sql executado ...' + sql[-100:])
                    print('Dados recuperados com sucesso.')
                    break
            total_time = time.time() - start_time
            print('\nTempo para recuperar dados: ' +
                  str(timedelta(seconds=(total_time))))
        except Exception as e:
            print("\033[91mNão foi possível se conectar na base do  TRT " +
                  sigla_trt + "\033[0m")
            print(e)
            continue

Example #49

0

Show file

File: p134.py Project: jamayfieldjr/iem

def get_data(ctx):
    """ Get the data"""
    days = ctx['days']
    varname = ctx['var']
    offset = 6 if varname.startswith('coldest') else 0
    station = ctx['station']
    if ctx['network'].endswith("CLIMATE"):
        table = "alldata_%s" % (station[:2], )
        pgconn = get_dbconn('coop')
        highcol = "high"
        lowcol = "low"
        precipcol = "precip"
        stationcol = "station"
    else:
        station = ctx['_nt'].sts[station]['iemid']
        pgconn = get_dbconn('iem')
        highcol = "max_tmpf"
        lowcol = "min_tmpf"
        precipcol = "pday"
        table = "summary"
        stationcol = "iemid"
    df = read_sql("""
    WITH data as (
    SELECT day, extract(year from day + '%s months'::interval) as season,
    avg((""" + highcol + """ + """ + lowcol + """)/2.)
        OVER (ORDER by day ASC ROWS %s preceding) as avg_temp,
    avg(""" + highcol + """)
        OVER (ORDER by day ASC ROWS %s preceding) as avg_hitemp,
    avg(""" + lowcol + """)
        OVER (ORDER by day ASC ROWS %s preceding) as avg_lotemp,
    sum(""" + precipcol + """)
        OVER (ORDER by day ASC ROWS %s preceding) as sum_precip
    from """+table+""" WHERE """ + stationcol + """ = %s),
    agg1 as (
        SELECT season, day, avg_temp, avg_hitemp, avg_lotemp,
        sum_precip,
        rank() OVER (PARTITION by season ORDER by avg_temp ASC)
            as coldest_temp_rank,
        rank() OVER (PARTITION by season ORDER by avg_hitemp ASC)
            as coldest_hitemp_rank,
        rank() OVER (PARTITION by season ORDER by avg_lotemp ASC)
            as coldest_lotemp_rank,
        rank() OVER (PARTITION by season ORDER by avg_temp DESC)
            as warmest_temp_rank,
        rank() OVER (PARTITION by season ORDER by avg_hitemp DESC)
            as warmest_hitemp_rank,
        rank() OVER (PARTITION by season ORDER by avg_lotemp DESC)
            as warmest_lotemp_rank,
        rank() OVER (PARTITION by season ORDER by sum_precip DESC)
            as wettest_rank,
        count(*) OVER (PARTITION by season)
        from data)
    SELECT season, day,
    extract(doy from day - '%s days'::interval)::int as doy,
    avg_temp, avg_hitemp, avg_lotemp,
    sum_precip from agg1 where """+varname+"""_rank = 1 and count > 240
    """, pgconn, params=(offset, days - 1, days - 1, days - 1, days - 1,
                         station, days - 1),
                  index_col='season')
    if varname.startswith('coldest'):
        df.loc[df['doy'] < 183, 'doy'] += 365.
    return df

Example #50

0

Show file

File: top25counties.py Project: timsklenar/dep

import psycopg2
import sys
from pandas.io.sql import read_sql
pgconn = psycopg2.connect(database='idep', host='iemdb', user='******')

df = read_sql("""
    with yearly as (
        SELECT huc_12, extract(year from valid) as yr,
        sum(avg_loss) from results_by_huc12 where scenario = 0
        GROUP by huc_12, yr),
    agg as (
        SELECT huc_12, avg(sum) from yearly GROUP by huc_12)

    SELECT st_x(st_centroid(st_transform(geom, 4326))),
    st_y(st_centroid(st_transform(geom, 4326))), a.huc_12, avg
    from huc12 h JOIN agg a on (h.huc_12 = a.huc_12) WHERE
    h.states ~* 'IA' ORDER by avg DESC
    """, pgconn, index_col='huc_12')

pgconn = psycopg2.connect(database='postgis', host='iemdb', user='******')
cursor = pgconn.cursor()

DONE = []
for i, row in df.iterrows():
    cursor.execute("""SELECT name from ugcs where end_ts is null and
    state = 'IA' and substr(ugc, 3, 1) = 'C' and
    ST_Contains(geom, St_SetSRID(ST_Point(%s, %s), 4326))
    """, (row['st_x'], row['st_y']))
    name = cursor.fetchone()[0]
    if name not in DONE:
        print name

Example #51

0

Show file

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    pgconn = psycopg2.connect(database='iem', host='iemdb', user='******')
    ctx = get_autoplot_context(fdict, get_description())

    sts = ctx['sts']
    ets = ctx['ets']
    varname = ctx['var']

    df = read_sql("""
    WITH data as (
        SELECT valid, high - high_normal as high_delta,
        low - low_normal as low_delta, precip, snow
        from cli_data where valid >= %s and valid <= %s
        and substr(station, 1, 1) = 'K'
    )
    SELECT valid,
    sum(case when high_delta > 0 then 1 else 0 end) as high_above,
    sum(case when high_delta = 0 then 1 else 0 end) as high_equal,
    sum(case when high_delta < 0 then 1 else 0 end) as high_below,
    sum(case when low_delta > 0 then 1 else 0 end) as low_above,
    sum(case when low_delta = 0 then 1 else 0 end) as low_equal,
    sum(case when low_delta < 0 then 1 else 0 end) as low_below,
    sum(case when precip > 0 then 1 else 0 end) as precip_above,
    sum(case when precip = 0 then 1 else 0 end) as precip_below,
    sum(case when snow > 0 then 1 else 0 end) as snow_above,
    sum(case when snow = 0 then 1 else 0 end) as snow_below
    from data GROUP by valid ORDER by valid ASC
    """,
                  pgconn,
                  params=(sts, ets),
                  index_col='valid')
    if len(df.index) == 0:
        return 'Error, no results returned!'
    for v in ['precip', 'snow']:
        if varname == v:
            xlabel = "<-- No %s %%   |     %s   %% -->" % (v.capitalize(),
                                                           v.capitalize())
        df[v + '_count'] = df[v + '_above'] + df[v + '_below']
        colors = ['r', 'b']
    for v in ['high', 'low']:
        df[v + '_count'] = (df[v + '_above'] + df[v + '_below'] +
                            df[v + '_equal'])
        if varname == v:
            xlabel = "<-- Below Average %    |    Above Average % -->"
        colors = ['b', 'r']

    (fig, ax) = plt.subplots(1, 1)
    ax.barh(df.index.values,
            0 - (df[varname + '_below'] / df[varname + '_count'] * 100.),
            fc=colors[0],
            ec=colors[0],
            align='center')
    ax.barh(df.index.values,
            df[varname + '_above'] / df[varname + '_count'] * 100.,
            fc=colors[1],
            ec=colors[1],
            align='center')
    ax.set_xlim(-100, 100)
    ax.grid(True)
    ax.set_title(
        ("Percentage of CONUS NWS First Order CLImate Sites\n"
         "(%s - %s) %s") % (sts.strftime("%-d %b %Y"),
                            ets.strftime("%-d %b %Y"), MDICT.get(varname)))
    ax.set_xlabel(xlabel)
    ticks = [-100, -90, -75, -50, -25, -10, 0, 10, 25, 50, 75, 90, 100]
    ax.set_xticks(ticks)
    ax.set_xticklabels([abs(x) for x in ticks])
    plt.setp(ax.get_xticklabels(), rotation=30)
    ax.set_position([0.2, 0.15, 0.75, 0.75])

    return fig, df

Example #52

0

Show file

File: p109.py Project: skmichaeldavis/iem

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn('postgis')
    ctx = get_autoplot_context(fdict, get_description())
    sts = ctx['sdate']
    sts = sts.replace(tzinfo=pytz.UTC)
    ets = ctx['edate']
    ets = ets.replace(tzinfo=pytz.UTC)
    p1 = ctx['phenomenav1']
    p2 = ctx['phenomenav2']
    p3 = ctx['phenomenav3']
    p4 = ctx['phenomenav4']
    phenomena = []
    for p in [p1, p2, p3, p4]:
        if p is not None:
            phenomena.append(p[:2])
    s1 = ctx['significancev1']
    s2 = ctx['significancev2']
    s3 = ctx['significancev3']
    s4 = ctx['significancev4']
    significance = []
    for s in [s1, s2, s3, s4]:
        if s is not None:
            significance.append(s[0])

    pstr = []
    subtitle = ""
    title = ""
    for p, s in zip(phenomena, significance):
        pstr.append("(phenomena = '%s' and significance = '%s')" % (p, s))
        subtitle += "%s.%s " % (p, s)
        title += vtec.get_ps_string(p, s)
    if len(phenomena) > 1:
        title = "VTEC Unique Event"
    pstr = " or ".join(pstr)
    pstr = "(%s)" % (pstr, )

    df = read_sql("""
with total as (
  select distinct wfo, extract(year from issue at time zone 'UTC') as year,
  phenomena, significance, eventid from warnings
  where """ + pstr + """ and
  issue >= %s and issue < %s
)

SELECT wfo, phenomena, significance, year, count(*) from total
GROUP by wfo, phenomena, significance, year
    """,
                  pgconn,
                  params=(sts, ets))

    df2 = df.groupby('wfo')['count'].sum()

    nt = NetworkTable("WFO")
    for sid in nt.sts:
        sid = sid[-3:]
        if sid not in df2:
            df2[sid] = 0
    maxv = df2.max()
    bins = [0, 1, 2, 3, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100, 200]
    if maxv > 200:
        bins = [
            0, 1, 3, 5, 10, 20, 35, 50, 75, 100, 150, 200, 250, 500, 750, 1000
        ]
    elif maxv > 1000:
        bins = [
            0, 1, 5, 10, 50, 100, 150, 200, 250, 500, 750, 1000, 1250, 1500,
            2000
        ]

    mp = MapPlot(sector='nws',
                 axisbg='white',
                 title='%s Counts by NWS Office' % (title, ),
                 subtitle=('Valid %s - %s UTC, based on VTEC: %s') %
                 (sts.strftime("%d %b %Y %H:%M"),
                  ets.strftime("%d %b %Y %H:%M"), subtitle))
    mp.fill_cwas(df2, bins=bins, ilabel=True)

    return mp.fig, df

Example #53

0

Show file

def plotter(fdict):
    """ Go """
    pgconn = util.get_dbconn('coop')

    today = datetime.date.today()
    ctx = util.get_autoplot_context(fdict, get_description())
    station = ctx['station']
    threshold = ctx['threshold']
    month1 = ctx['month1']
    varname1 = ctx['var1']
    num1 = min([12, ctx['num1']])
    month2 = ctx['month2']
    varname2 = ctx['var2']
    num2 = min([12, ctx['num2']])
    months1, offsets1 = compute_months_and_offsets(month1, num1)
    months2, offsets2 = compute_months_and_offsets(month2, num2)
    table = "alldata_%s" % (station[:2],)
    nt = network.Table("%sCLIMATE" % (station[:2],))
    # Compute the monthly totals
    df = read_sql("""
    SELECT year, month, avg((high+low)/2.) as avg_temp,
    sum(precip) as total_precip, max(high) as max_high, min(low) as min_low,
    sum(case when high >= %s then 1 else 0 end) as days_high_aoa,
    sum(cdd(high, low, 65)) as cdd65,
    sum(hdd(high, low, 65)) as hdd65,
    sum(gddxx(32, 86, high, low)) as gdd32,
    sum(gddxx(41, 86, high, low)) as gdd41,
    sum(gddxx(46, 86, high, low)) as gdd46,
    sum(gddxx(48, 86, high, low)) as gdd48,
    sum(gddxx(50, 86, high, low)) as gdd50,
    sum(gddxx(51, 86, high, low)) as gdd51,
    sum(gddxx(52, 86, high, low)) as gdd52
    from """+table+"""
    WHERE station = %s and day < %s GROUP by year, month
    """, pgconn, params=(threshold, station, today.replace(day=1)),
                  index_col='year')

    xdf = combine(df, months1, offsets1)
    ydf = combine(df, months2, offsets2)
    if xdf.empty or ydf.empty:
        raise NoDataFound("Sorry, could not find data.")

    resdf = pd.DataFrame({"%s_1" % (varname1, ): xdf[varname1],
                          "%s_2" % (varname2, ): ydf[varname2]})
    resdf.dropna(inplace=True)
    (fig, ax) = plt.subplots(1, 1, figsize=(8, 6))
    ax.scatter(resdf[varname1+"_1"], resdf[varname2+"_2"], marker='s',
               facecolor='b', edgecolor='b', label=None, zorder=3)
    ax.set_title(("%s-%s %s [%s]\n"
                  "Comparison of Monthly Periods, Quadrant Frequency Labelled"
                  ) % (resdf.index.min(), resdf.index.max(),
                       nt.sts[station]['name'], station))
    ax.grid(True)

    h_slope, intercept, r_value, _, _ = stats.linregress(resdf[varname1+"_1"],
                                                         resdf[varname2+"_2"])
    y = h_slope * np.arange(resdf[varname1+"_1"].min(),
                            resdf[varname1+"_1"].max()) + intercept
    ax.plot(np.arange(resdf[varname1+"_1"].min(),
                      resdf[varname1+"_1"].max()), y, lw=2, color='r',
            label="Slope=%.2f R$^2$=%.2f" % (h_slope, r_value ** 2))
    ax.legend(fontsize=10)
    xmonths = ", ".join([calendar.month_abbr[x] for x in months1])
    ymonths = ", ".join([calendar.month_abbr[x] for x in months2])
    t1 = "" if varname1 not in ['days_high_aoa', ] else " %.0f" % (threshold,)
    t2 = "" if varname2 not in ['days_high_aoa', ] else " %.0f" % (threshold,)
    x = resdf["%s_1" % (varname1, )].mean()
    y = resdf["%s_2" % (varname2, )].mean()
    ax.set_xlabel("%s\n%s%s [%s], Avg: %.1f" % (xmonths, PDICT[varname1], t1,
                                                UNITS[varname1], x),
                  fontsize=12)
    ax.set_ylabel("%s\n%s%s [%s], Avg: %.1f" % (ymonths, PDICT[varname2], t2,
                                                UNITS[varname2], y),
                  fontsize=12)

    box = ax.get_position()
    ax.set_position([box.x0, box.y0 + box.height * 0.05,
                     box.width, box.height * 0.95])
    ax.axhline(y, linestyle='--', color='g')
    ax.axvline(x, linestyle='--', color='g')
    ur = len(resdf[(resdf["%s_1" % (varname1, )] >= x) &
                   (resdf["%s_2" % (varname2, )] >= y)].index)
    ax.text(0.95, 0.75, "%s (%.1f%%)" % (ur,
                                         ur / float(len(resdf.index)) * 100.),
            color='tan', fontsize=24, transform=ax.transAxes, ha='right',
            zorder=2)
    lr = len(resdf[(resdf["%s_1" % (varname1, )] >= x) &
                   (resdf["%s_2" % (varname2, )] < y)].index)
    ax.text(0.95, 0.25, "%s (%.1f%%)" % (lr,
                                         lr / float(len(resdf.index)) * 100.),
            color='tan', fontsize=24, transform=ax.transAxes, ha='right',
            zorder=2)
    ll = len(resdf[(resdf["%s_1" % (varname1, )] < x) &
                   (resdf["%s_2" % (varname2, )] < y)].index)
    ax.text(0.05, 0.25, "%s (%.1f%%)" % (ll,
                                         ll / float(len(resdf.index)) * 100.),
            color='tan', fontsize=24, transform=ax.transAxes, ha='left',
            zorder=2)
    ul = len(resdf[(resdf["%s_1" % (varname1, )] < x) &
                   (resdf["%s_2" % (varname2, )] >= y)].index)
    ax.text(0.05, 0.75, "%s (%.1f%%)" % (ul,
                                         ul / float(len(resdf.index)) * 100.),
            color='tan', fontsize=24, transform=ax.transAxes, ha='left',
            zorder=2)
    return fig, resdf

Example #54

0

Show file

    def Plot(self):
        """Plots image and saves image"""
        try:
            os.remove('/tmp/plot.png')
        except:
            pass
        if self.stype == 'plot':
            try:
                conn = connectDB()
                cursor = conn.cursor()
                cursor.execute(self.sql)
                data = pd.DataFrame(
                    cursor.fetchall(),
                    columns=['gb', 'date_trunc', 'workflow', 'type'])

            except TypeError as e:
                LOGGER.error(str(e))

                return {'error': str(e)}

            if data.empty is not True:
                data.sort_values(["date_trunc", "workflow"],
                                 ascending=[False, True])
                data.date_trunc = data.date_trunc.dt.date
                data['gb'] = data['gb'].astype(float)
                gdata = data.groupby(['date_trunc', 'type',
                                      'workflow'])['gb'].sum()
                subsets = gdata.unstack(level=0).\
                    unstack(level=1).unstack(level=2)
                cleansubset = subsets.dropna()
                x_offset = -0.01
                y_offset = -0.06

                height = len(gdata) * 3
                ax = cleansubset.plot(legend=False,
                                      kind='barh',
                                      stacked=True,
                                      subplots=False,
                                      figsize=(32, height),
                                      width=0.89,
                                      sharey=False)
                for p in ax.patches:
                    b = p.get_bbox()
                    val = "{:.0f}".format(b.x1 - b.x0)
                    ax.annotate(val, ((b.x0 + b.x1) / 2 + x_offset,
                                      (b.y1) / (1) + y_offset),
                                verticalalignment='top',
                                horizontalalignment='left')
                fig = ax.get_figure()
                fig.savefig('/tmp/plot.png')
                LOGGER.info('saved image')
                plot.close(fig)
            else:
                LOGGER.info('no results, empty DataFrame')
                return {'error': 'emptydata, no results'}
        if self.stype == 'workflowplot':
            try:
                data = None
                conn = connectDB()
                cursor = conn.cursor()
                cursor.execute(self.sql)
                data = pd.DataFrame(
                    cursor.fetchall(),
                    columns=['gb', 'date_trunc', 'workflow', 'type'])

            except TypeError as e:
                LOGGER.error(str(e))
                return {'error': str(e)}

            if data.empty is not True:
                height = 8
                width = 16
                mean = 5
                if self.days >= 30:
                    width = self.days
                    height = 10
                    mean = 8
                if self.days >= 100:
                    width = 72
                    height = 18
                    mean = 20
                if self.days >= 250:
                    width = 160
                    height = 24
                    mean = 60

                data.sort_values(["date_trunc", "workflow"],
                                 ascending=[False, True])
                data.date_trunc = data.date_trunc.dt.date
                data['gb'] = data['gb'].astype(float)
                gdata = data.groupby(['date_trunc', 'workflow'])['gb'].sum()
                # avergae rolling mean over X days
                gdata_mean = gdata.rolling(mean).mean().fillna(value=0)
                d = gdata.unstack(level=-1).fillna(value=0)
                d2 = gdata_mean.unstack(level=-1)
                plot.style.use('ggplot')
                fig = plot.figure(figsize=(width, height))
                ax = fig.add_subplot()
                ax2 = ax.twiny()
                ax2.set_title('Ingest workflow')
                ax2.set_ylabel("GB", loc='center')
                d2.plot(legend=True,
                        kind='area',
                        ax=ax,
                        subplots=False,
                        stacked=True,
                        figsize=(width, height),
                        colormap='summer')

                ax.legend(loc='upper left')
                d.plot(legend=True,
                       kind='line',
                       ax=ax2,
                       subplots=False,
                       linewidth=5.0,
                       stacked=True,
                       sharey=True,
                       figsize=(width, height))

                fig.get_figure()
                #plot.show()
                fig.savefig('/tmp/plot.png')
                LOGGER.info('saved image')
                plot.close(fig)
            else:
                LOGGER.error('no results, empty DataFrame')
                return {'error': 'emptydata, no results'}
        if self.stype == 'cpplot':

            conn = connectDB()
            df = None
            df = psql.read_sql(self.sqlcp, conn)
            LOGGER.info('ran dataFrame SQL read')
            if df.empty is not True:

                o = df.groupby(['date_trunc', 'organisation'],
                               sort=False)['gb'].sum()
                u = o.unstack(level=-1)
                n = len(u.columns)
                height = int(n) * 6
                ax = u.plot(figsize=(32, height),
                            kind='area',
                            subplots=True,
                            stacked=False,
                            colormap='Accent')

                #plot.show()
                plot.savefig('/tmp/plot.png')
                LOGGER.info('saved image')
                #plot.close(plot)
            else:
                LOGGER.error('no results, empty DataFrame')
                return {'error': 'emptydata, no results'}
        try:
            conn.close()
        except Exception as e:
            LOGGER.error(str(e))

            pass

Example #55

0

Show file

def do(date):
    """Do the necessary work for this date"""
    pgconn = psycopg2.connect(database='hads',
                              host='iemdb-hads',
                              user='******')
    iem_pgconn = psycopg2.connect(database='iem', host='iemdb')
    icursor = iem_pgconn.cursor()
    # load up the current obs
    df = read_sql("""
    WITH dcp as (
        SELECT id, iemid, tzname from stations where network ~* 'DCP'
    ), obs as (
        SELECT iemid, pday from summary_""" + str(date.year) + """
        WHERE day = %s)
    SELECT d.id, d.iemid, d.tzname, o.pday from
    dcp d LEFT JOIN obs o on (d.iemid = o.iemid)
    """,
                  iem_pgconn,
                  params=(date, ),
                  index_col='id')
    bases = {}
    for tzname in df['tzname'].unique():
        ts = datetime.datetime(date.year, date.month, date.day, 12)
        ts = ts.replace(tzinfo=pytz.timezone("UTC"))
        base = ts.astimezone(pytz.timezone(tzname))
        bases[tzname] = base.replace(hour=0)
    # retreive data that is within 12 hours of our bounds
    sts = datetime.datetime(date.year, date.month,
                            date.day) - datetime.timedelta(hours=12)
    ets = sts + datetime.timedelta(hours=48)
    obsdf = read_sql("""
    SELECT distinct station, valid at time zone 'UTC' as utc_valid, value
    from raw""" + str(date.year) + """ WHERE valid between %s and %s and
    substr(key, 1, 3) = 'PPH' and value >= 0
    """,
                     pgconn,
                     params=(sts, ets),
                     index_col=None)
    obsdf['utc_valid'] = obsdf['utc_valid'].dt.tz_localize('utc')
    precip = np.zeros((24 * 60))
    grouped = obsdf.groupby('station')
    for station in obsdf['station'].unique():
        if station not in df.index:
            continue
        precip[:] = 0
        tz = df.loc[station, 'tzname']
        current_pday = df.loc[station, 'pday']
        for _, row in grouped.get_group(station).iterrows():
            ts = row['utc_valid'].to_datetime()
            if ts <= bases[tz]:
                continue
            t1 = (ts - bases[tz]).total_seconds() / 60.
            t0 = max([0, t1 - 60.])
            precip[t0:t1] = row['value'] / 60.
        pday = np.sum(precip)
        if pday > 50 or np.allclose([
                pday,
        ], [
                current_pday,
        ]):
            # print("Skipping %s %s==%s" % (station, current_pday,
            #                              pday))
            continue
        # print("Updating %s old: %s new: %s" % (station, current_pday, pday))
        iemid = df.loc[station, 'iemid']
        icursor.execute(
            """UPDATE summary_""" + str(date.year) + """
        SET pday = %s WHERE iemid = %s and day = %s
        """, (pday, iemid, date))
        if icursor.rowcount == 0:
            print("Adding record %s[%s] for day %s" % (station, iemid, date))
            icursor.execute(
                """INSERT into summary_""" + str(date.year) + """
            (iemid, day) VALUES (%s, %s)
            """, (iemid, date))
            icursor.execute(
                """UPDATE summary_""" + str(date.year) + """
            SET pday = %s WHERE iemid = %s and day = %s
            and %s > coalesce(pday, 0)
            """, (pday, iemid, date, pday))
    icursor.close()
    iem_pgconn.commit()

Example #56

0

Show file

File: p62.py Project: stormchas4/iem

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn("coop")
    ctx = get_autoplot_context(fdict, get_description())
    station = ctx["station"]
    ab = ctx["_nt"].sts[station]["archive_begin"]
    if ab is None:
        raise NoDataFound("Unknown station metadatab.")
    syear = max([ctx["syear"], ab.year])
    eyear = ctx["eyear"]
    sts = datetime.date(syear, 11, 1)
    ets = datetime.date(eyear + 1, 6, 1)

    table = "alldata_%s" % (station[:2], )
    eyear = datetime.datetime.now().year
    obs = np.ma.ones((eyear - syear + 1, 183), "f") * -1

    df = read_sql(
        """
        SELECT year, extract(doy from day) as doy, snowd, day,
        case when month < 6 then year - 1 else year end as winter_year
        from """ + table + """
        WHERE station = %s and
        month in (11, 12, 1, 2, 3, 4) and snowd >= 0 and day between %s and %s
    """,
        pgconn,
        params=(station, sts, ets),
        index_col="day",
    )
    if df.empty:
        raise NoDataFound("No Data Found.")
    minyear = df["year"].min()
    maxyear = df["year"].max()
    for _, row in df.iterrows():
        doy = row["doy"] if row["doy"] < 180 else (row["doy"] - 365)
        obs[int(row["winter_year"]) - syear, int(doy) + 61] = row["snowd"]

    obs.mask = np.where(obs < 0, True, False)
    # obs[obs == 0] = -1

    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_axes([0.1, 0.1, 0.93, 0.8])
    ax.set_xticks((0, 29, 60, 91, 120, 151, 181))
    ax.set_xticklabels(
        ["Nov 1", "Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1", "May 1"])
    ax.set_ylabel("Year of Nov,Dec of Season Labeled")
    ax.set_xlabel("Date of Winter Season")
    ax.set_title(("[%s] %s\nDaily Snow Depth (%s-%s) [inches]") %
                 (station, ctx["_nt"].sts[station]["name"], minyear, eyear))

    cmap = copy.copy(nwssnow())
    norm = mpcolors.BoundaryNorm(LEVELS, cmap.N)
    cmap.set_bad("#EEEEEE")
    cmap.set_under("white")
    res = ax.imshow(
        obs,
        aspect="auto",
        rasterized=True,
        norm=norm,
        interpolation="nearest",
        cmap=cmap,
        extent=[0, 182, eyear + 1 - 0.5, syear - 0.5],
    )
    fig.colorbar(res, spacing="proportional", ticks=LEVELS, extend="max")
    ax.grid(True)
    ax.set_ylim(maxyear + 0.5, minyear - 0.5)

    return fig, df

Example #57

0

Show file

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    pgconn = get_dbconn('coop')
    ctx = get_autoplot_context(fdict, get_description())
    st1 = ctx['st1'][:2]
    st2 = ctx['st2'][:2]
    st3 = ctx['st3'][:2]
    st4 = ctx['st4'][:2]
    st5 = ctx['st5'][:2]
    st6 = ctx['st6'][:2]
    y1 = ctx['y1']
    y2 = ctx.get('y2')
    y3 = ctx.get('y3')
    y4 = ctx.get('y4')
    years = [y1, y2, y3, y4]
    states = [st1, st2, st3, st4, st5, st6]
    commodity_desc = ctx['commodity_desc']
    df = read_sql("""
    select week_ending, state_alpha, num_value, unit_desc, year,
    extract(doy from week_ending) as doy from
    nass_quickstats
    where commodity_desc = %s  and statisticcat_desc = 'CONDITION'
    and state_alpha in %s ORDER by week_ending ASC
    """,
                  pgconn,
                  params=(
                      commodity_desc,
                      tuple(states),
                  ),
                  index_col=None)
    if df.empty:
        return "ERROR: No data found!"

    prop = matplotlib.font_manager.FontProperties(size=10)

    fig, ax = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 6))

    i = 0
    for row in range(3):
        for col in range(2):
            state = states[i]
            df2 = df[df['state_alpha'] == state]
            _years = df2['year'].unique()
            colors = ['black', 'green', 'blue', 'red']

            for year in _years:
                s = df2[(df2['year'] == year)
                        & ((df2['unit_desc'] == 'PCT POOR')
                           | (df2['unit_desc'] == 'PCT VERY POOR'))]
                s2 = s[['doy', 'num_value']].groupby(by=['doy']).sum()
                if year in years:
                    ax[row, col].plot(s2.index.values,
                                      s2['num_value'],
                                      c=colors.pop(),
                                      lw=3,
                                      zorder=5,
                                      label='%s' % (year, ))
                else:
                    ax[row, col].plot(s2.index.values,
                                      s2['num_value'].values,
                                      c='tan')
            if row == 0 and col == 0:
                ax[row, col].legend(ncol=5, loc=(0.4, -0.19), prop=prop)
            ax[row, col].set_xticks(
                (121, 152, 182, 213, 244, 274, 305, 335, 365))
            ax[row, col].set_xticklabels(calendar.month_abbr[5:])
            ax[row, col].set_xlim(120, 310)
            ax[row, col].grid(True)
            ax[row, col].set_ylim(0, 100)
            if col == 0:
                ax[row, col].set_ylabel("Coverage [%]")
            ax[row, col].text(1.,
                              0.5,
                              "%s" % (state_names[state], ),
                              ha='left',
                              va='center',
                              rotation=-90,
                              size=16,
                              transform=ax[row, col].transAxes)
            ax[row, col].text(0.02,
                              0.97,
                              state,
                              ha='left',
                              va='top',
                              size=14,
                              transform=ax[row, col].transAxes)
            i += 1

    fig.text(0.5,
             .91,
             ("USDA Weekly %s Crop Condition Report (%.0f-%.0f)\n"
              "Areal %% of State in Poor & Very Poor Condition "
              "(thru %s)") % (PDICT2[commodity_desc], df['year'].min(),
                              df['year'].max(), df['week_ending'].max()),
             ha='center')

    return fig, df

Example #58

0

Show file

def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    pgconn = psycopg2.connect(database='isuag', host='iemdb', user='******')

    threshold = int(fdict.get('threshold', 50))
    threshold_c = temperature(threshold, 'F').value('C')
    hours1 = int(fdict.get('hours1', 48))
    hours2 = int(fdict.get('hours2', 24))
    station = fdict.get('station', 'BOOI4')
    oldstation = XREF[station]

    df = read_sql("""
    with obs as (
        select valid, c300, lag(c300) OVER (ORDER by valid ASC) from hourly
        where station = %s),
    agg1 as (
        select valid,
        case when c300 > %s and lag < %s then 1
             when c300 < %s and lag > %s then -1
             else 0 end as t from obs),
    agg2 as (
        SELECT valid, t from agg1 where t != 0),
    agg3 as (
        select valid, lead(valid) OVER (ORDER by valid ASC),
        t from agg2),
    agg4 as (
        select extract(year from valid) as yr, valid, lead,
        rank() OVER (PARTITION by extract(year from valid) ORDER by valid ASC)
        from agg3 where t = 1
        and (lead - valid) >= '%s hours'::interval),
    agg5 as (
        select extract(year from valid) as yr, valid, lead
        from agg3 where t = -1)

    select f.yr, f.valid as fup, f.lead as flead, d.valid as dup,
    d.lead as dlead from agg4 f JOIN agg5 d ON (f.yr = d.yr)
    where f.rank = 1 and d.valid > f.valid
    ORDER by fup ASC
    """,
                  pgconn,
                  params=(oldstation, threshold, threshold, threshold,
                          threshold, hours1),
                  index_col=None)

    df2 = read_sql("""
    with obs as (
        select valid, tsoil_c_avg,
        lag(tsoil_c_avg) OVER (ORDER by valid ASC) from sm_hourly
        where station = %s),
    agg1 as (
        select valid,
        case when tsoil_c_avg > %s and lag < %s then 1
             when tsoil_c_avg < %s and lag > %s then -1
             else 0 end as t from obs),
    agg2 as (
        SELECT valid, t from agg1 where t != 0),
    agg3 as (
        select valid, lead(valid) OVER (ORDER by valid ASC),
        t from agg2),
    agg4 as (
        select extract(year from valid) as yr, valid, lead,
        rank() OVER (PARTITION by extract(year from valid) ORDER by valid ASC)
        from agg3 where t = 1
        and (lead - valid) >= '%s hours'::interval),
    agg5 as (
        select extract(year from valid) as yr, valid, lead
        from agg3 where t = -1)

    select f.yr, f.valid as fup, f.lead as flead, d.valid as dup,
    d.lead as dlead from agg4 f JOIN agg5 d ON (f.yr = d.yr)
    where f.rank = 1 and d.valid > f.valid
    ORDER by fup ASC
    """,
                   pgconn,
                   params=(station, threshold_c, threshold_c, threshold_c,
                           threshold_c, hours1),
                   index_col=None)

    (fig, ax) = plt.subplots(1, 1)

    d2000 = datetime.datetime(2000, 1, 1, 6)
    d2000 = d2000.replace(tzinfo=pytz.timezone("UTC"))
    for d in [df, df2]:
        for _, row in d.iterrows():
            if row['dlead'] is None:
                continue
            f0 = (row['fup'].replace(year=2000) - d2000).total_seconds()
            f1 = (row['flead'].replace(year=2000) - d2000).total_seconds()
            d0 = (row['dup'].replace(year=2000) - d2000).total_seconds()
            d1 = (row['dlead'].replace(year=2000) - d2000).total_seconds()
            if d1 < d0:
                continue
            ax.barh(row['fup'].year, (f1 - f0),
                    left=f0,
                    facecolor='r',
                    align='center',
                    edgecolor='r')
            c = 'lightblue' if (d1 - d0) < (hours2 * 3600) else 'b'
            ax.barh(row['fup'].year, (d1 - d0),
                    left=d0,
                    facecolor=c,
                    align='center',
                    edgecolor=c)

    xticks = []
    xticklabels = []
    for i in range(1, 13):
        d2 = d2000.replace(month=i)
        xticks.append((d2 - d2000).total_seconds())
        xticklabels.append(d2.strftime("%-d %b"))
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticklabels)
    ax.set_xlim(xticks[2], xticks[6])
    ax.grid(True)

    nt = NetworkTable("ISUSM")
    nt2 = NetworkTable("ISUAG")
    ax.set_title(
        ("[%s] %s 4 Inch Soil Temps\n[%s] %s used for pre-%s dates") %
        (station, nt.sts[station]['name'], oldstation,
         nt2.sts[oldstation]['name'], nt.sts[station]['archive_begin'].year))
    ax.set_ylim(df['yr'].min() - 1, df2['yr'].max() + 1)

    p0 = plt.Rectangle((0, 0), 1, 1, fc="r")
    p1 = plt.Rectangle((0, 0), 1, 1, fc="lightblue")
    p2 = plt.Rectangle((0, 0), 1, 1, fc="b")
    ax.legend((p0, p1, p2),
              ('First Period Above %s for %s+ Hours' %
               (threshold, hours1), 'Below %s for 1+ Hours' %
               (threshold, ), 'Below %s for %s+ Hours' % (threshold, hours2)),
              ncol=2,
              fontsize=11,
              loc=(0., -0.2))
    box = ax.get_position()
    ax.set_position(
        [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9])

    return fig, df

Example #59

0

Show file

File: p153.py Project: trentford/iem

def plotter(fdict):
    """ Go """
    font0 = FontProperties()
    font0.set_family('monospace')
    font0.set_size(16)
    font1 = FontProperties()
    font1.set_size(16)

    pgconn = get_dbconn('asos')
    ctx = get_autoplot_context(fdict, get_description())
    varname = ctx['var']
    varname2 = varname.split("_")[1]
    if varname2 in ['dwpf', 'tmpf']:
        varname2 = "i" + varname2
    month = ctx['month']
    network = ctx['network']
    station = ctx['zstation']
    nt = NetworkTable(network)

    if month == 'all':
        months = range(1, 13)
    elif month == 'fall':
        months = [9, 10, 11]
    elif month == 'winter':
        months = [12, 1, 2]
    elif month == 'spring':
        months = [3, 4, 5]
    elif month == 'summer':
        months = [6, 7, 8]
    elif month == 'gs':
        months = [5, 6, 7, 8, 9]
    else:
        ts = datetime.datetime.strptime("2000-" + month + "-01", '%Y-%b-%d')
        # make sure it is length two for the trick below in SQL
        months = [ts.month]

    df = read_sql("""
    WITH obs as (
        SELECT (valid + '10 minutes'::interval) at time zone %s as ts,
        tmpf::int as itmpf, dwpf::int as idwpf, mslp, alti from alldata
        where station = %s and
        extract(month from valid at time zone %s) in %s),
    agg1 as (
        SELECT extract(hour from ts) as hr,
        max(idwpf) as max_dwpf,
        max(itmpf) as max_tmpf,
        min(idwpf) as min_dwpf,
        min(itmpf) as min_tmpf,
        max(alti) as max_alti,
        min(alti) as min_alti,
        max(mslp) as max_mslp,
        min(mslp) as min_mslp
        from obs GROUP by hr)
    SELECT o.ts, a.hr::int as hr,
        a.""" + varname + """ from agg1 a JOIN obs o on
        (a.hr = extract(hour from o.ts)
        and a.""" + varname + """ = o.""" + varname2 + """)
        ORDER by a.hr ASC, o.ts DESC
    """,
                  pgconn,
                  params=(nt.sts[station]['tzname'], station,
                          nt.sts[station]['tzname'], tuple(months)),
                  index_col=None)

    y0 = 0.1
    yheight = 0.8
    dy = (yheight / 24.)
    (fig, ax) = plt.subplots(1, 1, figsize=(8, 8))
    ax.set_position([0.12, y0, 0.57, yheight])
    ax.barh(df['hr'], df[varname], align='center')
    ax.set_ylim(-0.5, 23.5)
    ax.set_yticks([0, 4, 8, 12, 16, 20])
    ax.set_yticklabels(['Mid', '4 AM', '8 AM', 'Noon', '4 PM', '8 PM'])
    ax.grid(True)
    ax.set_xlim([df[varname].min() - 5, df[varname].max() + 5])
    ax.set_ylabel("Local Time %s" % (nt.sts[station]['tzname'], ),
                  fontproperties=font1)

    fig.text(0.5,
             0.93, ("%s [%s] %s-%s\n"
                    "%s [%s]") %
             (nt.sts[station]['name'],
              station, nt.sts[station]['archive_begin'].year,
              datetime.date.today().year, PDICT[varname], MDICT[month]),
             ha='center',
             fontproperties=font1)
    ypos = y0 + (dy / 2.)
    for hr in range(24):
        sdf = df[df['hr'] == hr]
        if sdf.empty:
            continue
        row = sdf.iloc[0]
        fig.text(0.7,
                 ypos,
                 "%3.0f: %s%s" % (row[varname], row['ts'].strftime("%d %b %Y"),
                                  ("*" if len(sdf.index) > 1 else '')),
                 fontproperties=font0,
                 va='center')
        ypos += dy
    ax.set_xlabel("%s %s, * denotes ties" % (PDICT[varname], UNITS[varname]),
                  fontproperties=font1)

    return plt.gcf(), df

Example #60

0

Show file

File: p56.py Project: trentford/iem

def plotter(fdict):
    """ Go """
    pgconn = get_dbconn('postgis')
    ctx = get_autoplot_context(fdict, get_description())

    opt = ctx['opt']
    state = ctx['state'][:2]
    phenomena = ctx['phenomena']
    significance = ctx['significance']
    station = ctx['station'][:4]

    nt = NetworkTable('WFO')

    sts = datetime.datetime(2012, 1, 1)
    xticks = []
    for i in range(1, 13):
        ts = sts.replace(month=i)
        xticks.append(int(ts.strftime("%j")))

    (fig, ax) = plt.subplots(2, 1, sharex=True)

    limiter = " wfo = '%s' " % (station, )
    title = "[%s] NWS %s" % (station, nt.sts[station]['name'])
    if opt == 'state':
        title = "State of %s" % (reference.state_names[state], )
        limiter = " substr(ugc, 1, 2) = '%s' " % (state, )
    df = read_sql("""
    with obs as (
        SELECT distinct extract(year from issue) as yr,
        extract(week from issue) as week, wfo, eventid from warnings WHERE
        """ + limiter + """ and phenomena = %s and significance = %s
    )
    SELECT yr, week, count(*) from obs GROUP by yr, week ORDER by yr ASC
    """,
                  pgconn,
                  params=(phenomena, significance),
                  index_col=None)

    if df.empty:
        raise ValueError("ERROR: No Results Found!")

    # Top Panel: count
    gdf = df.groupby('week').count()
    ax[0].bar((gdf.index.values - 1) * 7, gdf['yr'], width=7)
    ax[0].set_title(("%s\n%s (%s.%s) Events - %i to %i") %
                    (title, vtec.get_ps_string(phenomena, significance),
                     phenomena, significance, df['yr'].min(), df['yr'].max()))
    ax[0].grid()
    ax[0].set_ylabel("Years with 1+ Event")

    # Bottom Panel: events
    gdf = df.groupby('week').sum()
    ax[1].bar((gdf.index.values - 1) * 7, gdf['count'], width=7)
    ax[1].set_ylabel("Total Event Count")
    ax[1].grid()
    ax[1].set_xlabel("Partitioned by Week of the Year")
    ax[1].set_xticks(xticks)
    ax[1].set_xticklabels(calendar.month_abbr[1:])
    ax[1].set_xlim(0, 366)

    return fig, df