def _check_roundtrip(self, frame): _skip_if_no_MySQLdb() drop_sql = "DROP TABLE IF EXISTS test_table" cur = self.db.cursor() cur.execute(drop_sql) sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() frame2['Idx'] = Index(list(range(len(frame2)))) + 10 drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() cur.execute(drop_sql) sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() expected.index = Index(list(range(len(frame2)))) + 10 tm.assert_frame_equal(expected, result)
def export_to_urbancanvas(building_df, current_year, urbancanvas_scenario_id): import pandas.io.sql as sql import psycopg2 import cStringIO conn_string = "host='paris.urbansim.org' dbname='denver' user='******' password='******' port=5433" conn = psycopg2.connect(conn_string) cur = conn.cursor() if urbancanvas_scenario_id == 0: query = "select nextval('developmentproject_id_seq');" nextval = sql.read_frame(query, conn) nextval = nextval.values[0][0] query = "select max(id)+1 from scenario_project;" id = sql.read_frame(query, conn) id = id.values[0][0] query = "INSERT INTO scenario(id, name) VALUES(%s, 'Run #%s');" % ( nextval, nextval) cur.execute(query) conn.commit() query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, 1);" % ( id, nextval) cur.execute(query) conn.commit() query = "select max(id)+1 from scenario_project;" id = sql.read_frame(query, conn) id = id.values[0][0] query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, %s);" % ( id, nextval, nextval) cur.execute(query) conn.commit() else: nextval = urbancanvas_scenario_id nextval_string = '{' + str(nextval) + '}' building_df['projects'] = nextval_string valid_from = '{' + str(current_year) + '-1-1}' building_df['valid_from'] = valid_from building_df['land_area'] = 0 building_df['tax_exempt'] = 0 building_df['srcparc_id'] = '0' building_df['building_id'] = building_df.index.values #building_df['stories'] = 30 ###For testing! del building_df['unit_price_residential'] del building_df['unit_price_non_residential'] del building_df['building_sqft_per_job'] del building_df['base_year_jobs'] del building_df['non_residential_units'] del building_df['all_units'] print 'Exporting %s buildings to Urbancanvas database for project %s and year %s.' % ( building_df.index.size, nextval, current_year) output = cStringIO.StringIO() building_df.to_csv(output, sep='\t', header=False, index=False) output.seek(0) cur.copy_from(output, 'building', columns=tuple(building_df.columns.values.tolist())) conn.commit() return nextval
def request_data_from_db(nations,dt_from,dt_to): global df_bbData #bloomberg와 ecos를 합쳐서 보여줌 con = lite.connect('../data/nowcasting.db') df_idxData = sql.read_frame('select * from idx_data',con=con) df_idxIndex = sql.read_frame('select * from idx_desc',con=con) df_gdp = sql.read_frame('select * from idx_gdp',con=con) con.close() df_idxData.index = pd.to_datetime(df_idxData[df_idxData.columns[0]]) df_idxData = df_idxData[df_idxData.columns[1:]] df_gdp.index = pd.to_datetime(df_gdp['date']) df_gdp = df_gdp[df_gdp.columns[:-1]] lst_degIdx = df_idxIndex[df_idxIndex['rgn2'].isin(nations)]['num'] df_idxData = df_idxData[df_idxData.columns[df_idxData.columns.isin(lst_degIdx)]] #I529, I530은 데이터가 워낙 적어 빼줌 df_idxData = df_idxData.drop(['I529','I530'],1) df_bbData = df_idxData #df_nation = extract_national_df(lst_nation) df_quarter,df_month,df_week,df_daily = agg_mmQqWw2(dt_from,dt_to) #df_gdp = extract_gdp_excel('../data/Ecos_gdp.xlsx','Sheet1') df_gdp = df_gdp.ix[df_quarter.index] #df_quarter가 가지고 있는 범위 만큼만 잘라줌 df_gdp = df_gdp[nations[0].encode('utf-8')] #첫번째가 국가, 두번째는 글로벌이기 때문 df_quarter['gdp'] = df_gdp return df_quarter,df_month,df_week
def _check_roundtrip(self, frame): _skip_if_no_MySQLdb() drop_sql = "DROP TABLE IF EXISTS test_table" cur = self.db.cursor() with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unknown table.*") cur.execute(drop_sql) sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table", self.db) # HACK! Change this once indexes are handled properly. result.index = frame.index result.index.name = frame.index.name expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() index = Index(lrange(len(frame2))) + 10 frame2['Idx'] = index drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unknown table.*") cur.execute(drop_sql) sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() # HACK! Change this once indexes are handled properly. expected.index = index expected.index.names = result.index.names tm.assert_frame_equal(expected, result)
def get_development_projects(): conn_string = orca.get_injectable('conn_string') if len(conn_string) == 0: print 'A "conn_string" injectable must be registered and populated. Skipping export-to-Urban-Canvas.' return None conn = psycopg2.connect(conn_string) cur = conn.cursor() print "Loading committed development projects table" devproj_query = "select id, placetype_id as building_type_id, duration, buildings_number, average_floors as stories, sqft as non_residential_sqft, sqft_unit as sqft_per_unit, units as residential_units, Name as name, start_date from developmentprojects where committed = 'TRUE';" devproj = sql.read_frame(devproj_query,conn) devproj['year_built'] = devproj.start_date.astype('object').astype('str') devproj.year_built = devproj.year_built.str.slice(start=0, stop=4) devproj.year_built = devproj.year_built.astype('int') print "Loading development project parcels" dp_pcl_query = "select developmentprojects_parcels.development_project, developmentprojects_parcels.parcel_id, parcel.parcel_acres from developmentprojects_parcels, parcel where developmentprojects_parcels.parcel_id = parcel.parcel_id;" dp_pcl = sql.read_frame(dp_pcl_query, conn) devproject_parcel_ids = dp_pcl.groupby('development_project').parcel_id.max().reset_index() ##In future, use the parcel_acres field on this tbl too scheduled_development_events = pd.merge(devproject_parcel_ids, devproj, left_on='development_project', right_on='id') scheduled_development_events = scheduled_development_events.rename(columns={'development_project':'scheduled_development_event_id', 'building_type_id':'development_type_id'}) scheduled_development_events = scheduled_development_events[['scheduled_development_event_id', 'year_built', 'development_type_id', 'stories', u'non_residential_sqft', 'sqft_per_unit', 'residential_units', 'parcel_id']] for col in scheduled_development_events: scheduled_development_events[col] = scheduled_development_events[col].astype('int') return scheduled_development_events
def ReadPatternsFromDB(codearg,typearg,namearg,mode): code = codearg #'097950'#'005930' #'005380'#009540 #036570 if typearg == 1: symbol = 'GOOG/KRX_'+code elif typearg == 2: symbol = 'GOOG/KOSDAQ_'+code elif typearg == 3: symbol = 'GOOG/INDEXKRX_KOSPI200' # symbol = 'GOOG/INDEXKRX_KOSPI200' startdate = '2014-01-01' # enddate = '2008-12-30' print symbol ''' pattern read ''' dbname = 'pattern_db_'+codearg+'_'+namearg+'.sqlite' con = sqlite3.connect("../data/pattern/"+dbname) query = "SELECT * FROM sqlite_master WHERE type='table'" df = pd.io.sql.read_frame(query,con) tablelen = len(df) print 'tablelen:',tablelen tablename_base = 'result_'+codearg+'_'+namearg readlist = [] for cnt in range(tablelen): tablename = tablename_base+'_'+str(cnt) # print 'readtable:',tablename patterndf = pd_sql.read_frame("SELECT * from "+tablename, con) readlist.append(PatternData(patterndf)) readlist[cnt].patterndf.index = readlist[cnt].patterndf['Date'] readlist[cnt].patterndf = readlist[cnt].patterndf.drop('Date',1) # print 'read pattern:',readlist[0].patterndf # print 'org patternAr:',patternAr_org[0].patterndf # con.close() dbname = 'extractid_db_'+codearg+'_'+namearg+'.sqlite' con2 = sqlite3.connect("../data/pattern/"+dbname) tablename = 'result_'+codearg+'_'+namearg extractdf = pd_sql.read_frame("SELECT * from "+tablename, con2) extractids = extractdf['ExtractId'].values # print 'read pattern:' # print readlist[0].patterndf print 'extractids:',extractids,len(extractids) con.close() con2.close() print 'extractid save done' return readlist,extractids
def update_apsim_output_table(masterDbConn, runPath, update): ''' Updates the apsimOutput table in the master run database. If a run is already there it is updated, otherwise it is added. Parameters ---------- masterDbConn : sqlite connection object master database to connect to runPath : string path to the run folder for the apsimData.sqlite database for a particular run update : bool if the database needs to be updated or if it is the first commit for a particular run Returns ------- Nothing. ''' # get the runId runId = int(os.path.split(runPath)[1]) # don't do anything if the database is being updated if update == True: print "*** Warning: Run {} data may already exist. Skipping write.".format(runId) return # get sow start from parameters table sql = "SELECT sow_start FROM runParameters WHERE run_id = {}".format(runId) sowStart = psql.read_frame(sql, masterDbConn).ix[0][0] # check to see if sow date is auto (determined from lookup table) if sowStart == 'auto': # read sow start for each location sql = "SELECT point_id, sow_start FROM gridPoints" sowDates = psql.read_frame(sql, masterDbConn, index_col='point_id') else: # set sow start the same for each location sql = "SELECT point_id FROM gridPoints" gridPoints = psql.read_frame(sql, masterDbConn) sowDates = pandas.DataFrame([sowStart] * len(gridPoints), index=gridPoints['point_id']) # get the run database path apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite') # read and convert to yearly formatted data apsimData = _apsim_output(apsimDbPath, sowDates) # add column with runId runIdSeries = pandas.Series([runId] * len(apsimData)) apsimData['run_id'] = runIdSeries # write runData to master database psql.write_frame(apsimData, 'apsimOutput', masterDbConn, if_exists='append')
def export_to_urbancanvas(building_df,current_year,urbancanvas_scenario_id): import pandas.io.sql as sql import psycopg2 import cStringIO conn_string = "host='paris.urbansim.org' dbname='denver' user='******' password='******' port=5433" conn=psycopg2.connect(conn_string) cur = conn.cursor() if urbancanvas_scenario_id == 0: query = "select nextval('developmentproject_id_seq');" nextval = sql.read_frame(query,conn) nextval = nextval.values[0][0] query = "select max(id)+1 from scenario_project;" id = sql.read_frame(query,conn) id = id.values[0][0] query = "INSERT INTO scenario(id, name) VALUES(%s, 'Run #%s');" % (nextval,nextval) cur.execute(query) conn.commit() query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, 1);" % (id,nextval) cur.execute(query) conn.commit() query = "select max(id)+1 from scenario_project;" id = sql.read_frame(query,conn) id = id.values[0][0] query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, %s);" % (id,nextval,nextval) cur.execute(query) conn.commit() else: nextval = urbancanvas_scenario_id nextval_string = '{' + str(nextval) + '}' building_df['projects'] = nextval_string valid_from = '{' + str(current_year) + '-1-1}' building_df['valid_from'] = valid_from building_df['land_area'] = 0 building_df['tax_exempt'] = 0 building_df['srcparc_id'] = '0' building_df['building_id'] = building_df.index.values #building_df['stories'] = 30 ###For testing! del building_df['unit_price_residential'] del building_df['unit_price_non_residential'] del building_df['building_sqft_per_job'] del building_df['base_year_jobs'] del building_df['non_residential_units'] del building_df['all_units'] print 'Exporting %s buildings to Urbancanvas database for project %s and year %s.' % (building_df.index.size,nextval,current_year) output = cStringIO.StringIO() building_df.to_csv(output, sep='\t', header=False, index=False) output.seek(0) cur.copy_from(output, 'building', columns =tuple(building_df.columns.values.tolist())) conn.commit() return nextval
def get_val_from_uc_db(query): try: result = sql.read_frame(query, conn) return result.values[0][0] except: conn=psycopg2.connect(conn_string) cur = conn.cursor() orca.add_injectable('uc_conn', conn) orca.add_injectable('uc_cur', cur) result = sql.read_frame(query, conn) return result.values[0][0]
def get_val_from_uc_db(query): try: result = sql.read_frame(query, conn) return result.values[0][0] except: conn = psycopg2.connect(conn_string) cur = conn.cursor() sim.add_injectable('uc_conn', conn) sim.add_injectable('uc_cur', cur) result = sql.read_frame(query, conn) return result.values[0][0]
def readDFFromDB(self, table_name, limit=None): if not limit: df = pd_sql.read_frame('select * from %s' % table_name, self._connection) else: df = pd_sql.read_frame('select * from %s limit %s' % (table_name, limit), self._connection) try: df.index = pd.DatetimeIndex(df['date']) df.pop('date') except: self._log.error('** Creating dataframe index from sqlite read') return df
def get_val_from_uc_db(query): try: result = sql.read_frame(query, conn) return result.values[0][0] except: conn=psycopg2.connect(conn_string) cur = conn.cursor() orca.add_injectable('uc_conn', conn) orca.add_injectable('uc_cur', cur) result = sql.read_frame(query, conn) result2 = sql.read_frame("select column_name from Information_schema.columns where table_name like 'building' ", conn) print result2 return result.values[0][0]
def load(self, product, **kwargs): dnow = datetime.datetime.now() fsettle = self.DATA + 'settle/' + product.lower() + '.sql' flive = self.DATA + 'live/' + product.lower() + '.sql' conn = sqlite3.connect(fsettle, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) cur = conn.cursor() cur.execute('ATTACH \"%s\" AS live' % (flive)) # build the basic query #query = 'SELECT timestamp as "[timestamp]" FROM %s' query = 'SELECT * FROM %s' conj = ' WHERE ' if 'start' in kwargs: query += (conj + 'timestamp >= "%s"' % kwargs.get('start')) conj = ' AND ' if 'end' in kwargs: query += (conj + 'timestamp < "%s"' % kwargs.get('end')) conj = ' AND ' query += ' ORDER BY timestamp' self.opt_settle = sql.read_frame(query % 'options', conn) self.opt_settle['timestamp'] = self.opt_settle['timestamp'].apply(Timestamp) self.opt_settle['month'] = self.opt_settle['month'].apply(str) self.opt_settle.set_index(['timestamp', 'month', 'strike'], inplace=True) self.fut_settle = sql.read_frame(query % 'futures', conn) self.fut_settle['timestamp'] = self.fut_settle['timestamp'].apply(Timestamp) # how you multi-index depends on how you are going to use it. For a timeseries of ERM4 you want # to index by ['mon', 'timestamp'], while for looking at the evolution of the curve it would be # timestamp month. Note the need-for-sortedness too. self.fut_settle.set_index(['mon', 'timestamp'], inplace=True) self.fut_settle.sortlevel(0, inplace=True) # Do we need this? would have been better to do it when snapping. if not self.valid_database('live.sqlite_master', cur): self.opt_live = None self.fut_live = None return self.fut_live = sql.read_frame(query % 'live.futures', conn) self.fut_live['timestamp'] = self.fut_live['timestamp'].apply(Timestamp) self.fut_live.set_index(['mon', 'timestamp'], inplace=True) self.fut_live.sortlevel(0, inplace=True) self.opt_live = sql.read_frame(query % 'live.options', conn) self.opt_live['timestamp'] = self.opt_live['timestamp'].apply(Timestamp) self.opt_live['month'] = self.opt_live['month'].apply(str) self.opt_live.set_index(['timestamp', 'month', 'strike'], inplace=True) conn.close()
def readDFFromDB(self, table_name, limit=None): if not limit: df = pd_sql.read_frame('select * from %s' % table_name, self._connection) else: df = pd_sql.read_frame( 'select * from %s limit %s' % (table_name, limit), self._connection) try: df.index = pd.DatetimeIndex(df['date']) df.pop('date') except: self._log.error('** Creating dataframe index from sqlite read') return df
def _read_from(self, table, conn): """ Read from table """ get_all = "SELECT * FROM " sql_type = self.sql_type if sql_type == "PG": dframe = sql.read_frame(get_all + '"' + table + '"', conn) else: dframe = sql.read_frame(get_all + '"' + table + '"', conn) return dframe
def bus_sql_compare(): ''' bus_cols=['Bus_Num', 'Bus_Name', 'Bus_kV', 'Code', 'Area_Num', 'Zone_Num', \ 'Owner_Num','Voltage_pu','Angle'] ''' global db # Busses in both files, but Pmax, Qmax or In_Service changed print(' Bus name changes') sql_str = ''' select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2 , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num , 'name' as Change_type from bus1, bus2 where bus1.Bus_Num = bus2.Bus_Num and bus1.Bus_Name != bus2.Bus_Name ''' bus_diff = sql.read_frame(sql_str, db) # Units only in file 1 print(' buss dropped') sql_str = ''' select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2 , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num , 'name' as Change_type from bus1 LEFT JOIN bus2 ON bus1.Bus_Num = bus2.Bus_Num WHERE bus2.Bus_Num is NULL ''' bus_drop = sql.read_frame(sql_str, db) # concat results bus_diff = bus_diff.append(bus_drop) # Units only in file 2 print(' buss added') sql_str = ''' select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2 , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num , 'name' as Change_type from bus2 LEFT JOIN bus1 ON bus1.Bus_Num = bus2.Bus_Num WHERE bus1.Bus_Num is NULL ''' bus_add = sql.read_frame(sql_str, db) # concat results bus_diff = bus_diff.append(bus_add) return bus_diff
def parse_data(origin, destination, delta_t = 7, verbose=True): ''' Reads data from flight_data.db for a given origin, destination pair and generates / transforms all the data fields necessary for analysis Data Name Dictionary: QDATE: Query Date CXR: Airline DFLIGHT: Flight Number DTIME: Local Departure Time DTD: Days to Departure QDAY: Query Day of Week DDAY: Departure Day of Week DCHUNK: Depature Time of Day eg. morning, afternoon etc. DMONTH: Departure Month FARE: Current Fare Price in USD NFARE: Normalized Fare Price (see process_fare for more information) DFARE: Binary reprentation of forward difference in fare price where 1 signigifies an increase and 0 a decrease DPFARE_i: Percent trailing difference in fare price back i periods ''' if verbose: print 'Reading data from flight_data.db...' market = ', '.join(["".join(["'",origin, destination,"'"])] + ["".join(["'",destination,origin,"'"])]) cnx = sqlite3.connect('flight_data_3.db') query = 'SELECT QDATE, CXR, DFLIGHT, DDATE, DTIME, FARE \ from flightdata WHERE MARKET in (%s)' % market df = sql.read_frame(query,cnx) if verbose: print 'Processing Results' airlines = list(set(df['CXR'])) air_code_dict = {airline:i for i,airline in enumerate(airlines)} df['CXR'] = df['CXR'].apply(lambda x: air_code_dict[x]) df['QDATE'] = df['QDATE'].apply(lambda x: dateParser((str(int(x))),0)) df['DDATE'] = df['DDATE'].apply(lambda x: dateParser(x,1)) if verbose: print 'Computing Days till Departure' df['DTD'] = df[['DDATE','QDATE']].apply(day_diff, axis = 1) df['DTIME'] = df['DTIME'].apply(lambda x: int(x[:2])) df['QDAY'] = df['QDATE'].apply(lambda x: x.weekday()) df['DDAY'] = df['DDATE'].apply(lambda x: x.weekday()) df['DCHUNK'] = df['DTIME'].apply(time_of_day) df['DMONTH'] = df['DDATE'].apply(lambda x: x.strftime('%m')) fare_dict = gen_fare_dict(df) if verbose: print 'Computing Fare Differences' df['DFARE'] = df.apply(lambda x: get_fare_delta(x,delta_t,fare_dict), axis = 1) df['DPFARE_1'] = df.apply(lambda x: get_fare_delta(x,delta_t,fare_dict,r_type='percent',time='past'), axis = 1) df['DPFARE_2'] = df.apply(lambda x: get_fare_delta(x,delta_t,fare_dict,r_type='percent',time='past'), axis = 1) if verbose: print 'Normalizing Fare Prices' df['NFARE'] = process_fare(df,verbose) if verbose: print 'Dropping null values' return df.dropna()
def run(self): import pandas.io.sql as sql upstreamDistance = self.i.upstreamDistance.val downstreamDistance = self.i.downstreamDistance.val if self.i.species_genome.val == "mm9": srmotifs_tablename = "mm9_gene_srmotifs_detail" elif self.i.species_genome.val == "hg19": srmotifs_tablename = "hg19_gene_srmotifs_detail" query = """ select genename, chrom, txstart-motif_start as tx_start_dist, cdsstart-motif_start as cds_start_dist, txend-motif_start as tx_end_dist, txend-motif_end as tx_end_motif_end_dist, motif_end-motif_start, gene_strand, motif_strand, motif_name, motif_score FROM %(srmotifs_tablename)s WHERE motif_score >= 0.7 AND ( ( gene_strand = '+' AND (motif_start > (txstart - %(upstreamDistance)d) and motif_start < (txend + %(downstreamDistance)d) ) ) OR ( gene_strand = '-' AND (motif_end > (txstart - %(downstreamDistance)d) and motif_end < (txend + %(upstreamDistance)d) ) ) ) """ % { "upstreamDistance": upstreamDistance, "downstreamDistance": downstreamDistance, "srmotifs_tablename": srmotifs_tablename } self.o.mm9_gene_motifs.val = sql.read_frame(query, conn)
def predict(self, predict_table_name, actual_label_col=''): ''' Return predicted values using the trained model. Also return precision, recall & f-measure Input: ====== predict_table_name : (String) the name of the table to be used for prediction actual_label_col : (String) the name of the actual label column (will be ignored if empty) Output: ======= A dataframe of the prediction results ''' #Transform the columns if any of them are categorical predict_table_name, _indep, _dep, _discard = pivotCategoricalColumns(self.dbconn,predict_table_name, self.model['indep_org'], actual_label_col, self.model['col_distinct_vals_dict'] ) stmt = ''' select *, {madlib_schema}.array_dot(array{coef}::real[],{indep}) as prediction from {table_name} '''.format( coef=self.model['coef'], indep=self.model['indep'], table_name=predict_table_name, madlib_schema=self.dbconn.getMADlibSchema() ) prediction_results = psql.read_frame(stmt,self.dbconn.getConnection()) return prediction_results
def get_qwop(connection,dateBeg,dateEnd,company): '''Quantity weighted offer price query, from Ramu''' q = """Select com.Fp_Offers.DTTM_ID, com.Fp_Offers.Trading_DATE as 'Date', com.Fp_Offers.Trading_Period as 'TP', com.MAP_PNode_to_POC_and_Island.Island, com.MAP_Participant_names.Parent_Company_ID, (Sum((com.Fp_Offers.Offer_Price * com.Fp_Offers.Offer_Quantity)) / Sum(com.Fp_Offers.Offer_Quantity)) As 'QWOP' From com.Fp_Offers Inner Join com.MAP_Participant_names On com.Fp_Offers.Trader_Id = com.MAP_Participant_names.Trader_Id Inner Join com.MAP_PNode_to_POC_and_Island On com.Fp_Offers.PNode = com.MAP_PNode_to_POC_and_Island.PNode Where com.Fp_Offers.Trading_DATE >= '%s' And com.Fp_Offers.Trading_DATE <= '%s' And com.Fp_Offers.trade_type = 'ENOF' And com.MAP_Participant_names.Parent_Company_ID = '%s' And com.MAP_PNode_to_POC_and_Island.Island = 'SI' Group By com.Fp_Offers.DTTM_ID, com.Fp_Offers.Trading_DATE, com.Fp_Offers.Trading_Period, com.MAP_PNode_to_POC_and_Island.Island, com.MAP_Participant_names.Parent_Company_ID order by com.Fp_Offers.DTTM_ID""" % (dateBeg.strftime("%Y-%m-%d"),dateEnd.strftime("%Y-%m-%d"),company) t = sql.read_frame(q,connection,coerce_float=True) t['Date'] = t['Date'].map(lambda x: date_converter(x)) t = t.set_index(['Date','TP']).QWOP return t
def get_rm_demand(connection,dateBeg,dateEnd,company): '''rm demand by parent company, from Ramu''' q = """Select com.RM_Demand_by_trader.DTTM_ID, com.RM_Demand_by_trader.Trading_Date, com.RM_Demand_by_trader.Trading_Period, com.MAP_Participant_names.Parent_Company_ID, com.MAP_NSP_POC_to_region.ISLAND, Sum(com.RM_Demand_by_trader.RM_demand) As 'RMLoad' From com.RM_Demand_by_trader Inner Join com.MAP_Participant_names On com.RM_Demand_by_trader.Trader_ID = com.MAP_Participant_names.Trader_Id Inner Join com.MAP_NSP_POC_to_region On com.RM_Demand_by_trader.POC = com.MAP_NSP_POC_to_region.POC Where com.RM_Demand_by_trader.Trading_Date >= '%s' And com.RM_Demand_by_trader.Trading_Date <= '%s' And com.MAP_Participant_names.Parent_Company_ID = '%s' Group By com.RM_Demand_by_trader.DTTM_ID, com.RM_Demand_by_trader.Trading_Date, com.RM_Demand_by_trader.Trading_Period, com.MAP_Participant_names.Parent_Company_ID, com.MAP_NSP_POC_to_region.ISLAND Order By com.RM_Demand_by_trader.DTTM_ID, com.MAP_NSP_POC_to_region.ISLAND""" % (dateBeg.strftime("%Y-%m-%d"),dateEnd.strftime("%Y-%m-%d"),company) t = sql.read_frame(q,connection,coerce_float=True) return t
def getAllDataFrame(sym, startdate, dbconn): print "getAllDataFrame", sym, startdate sqlstr = "select * from stockeod where symbol='%s' and sdate>='%s'" % ( sym, startdate) #cursor = dbconn.cursor() try: tb = "" #cursor.execute(sql) df = sql.read_frame(sqlstr, dbconn) #results = cursor.fetchall() #df = DataFrame(cursor.fetchall()) #df.columns = cursor.keys() return df #import MySQLdb as mdb #from pandas import * #conn = mdb.connect('<server>','<user>','<pass>','<db>'); except: print "error" tb = traceback.format_exc() print tb return
def insert_statice(): conn = MySQLdb.Connect(host='localhost', user='******', passwd='123456', db='cqlianjia_daily_monitoring',charset='utf8') cur=conn.cursor() #cur.excute('INSERT INTO signal_item_statics (date) VALUE (CURDATE());') currentdate = time.strftime('%Y_%m_%d',time.localtime(time.time())) sqlname = 'houseprice_'+currentdate cqpd = sql.read_frame('select area,price,view,unitprice from %s'%sqlname,conn) items= ['avr unit price_0', 'avr total price_1', 'total house quantity_2', 'total house area_3', 'MID unit price_4', 'MID total price_5', 'house quantity viewed_6', 'AVR viewed times_7' ] avr = cqpd.mean() mid = cqpd.median(axis = 0) sumpd = cqpd.sum() items[0] = sumpd[1]/sumpd[0]*10000 items[1] = avr[1] items[2] = len(cqpd) items[3] = sumpd[0] items[4] = mid[3] items[5] = mid[1] items[6] = len(cqpd[cqpd.view!=0]) items[7] = cqpd[cqpd.view!=0].sum()[2]/items[6] cur.execute("insert into signal_item_statics values(CURDATE(),%s,%s,%s,%s,%s,%s,%s,%s)",items) conn.commit() cur.close() return
def init_hops(): g.db = connect_db() hopdf = sql.read_frame('select * from hop where id<63',g.db,'name') hopdf = hopdf[['alpha','beta','humulene','caryophyllene','cohumulone','myrcene','notes','origin','substitutes']] hopdf.to_csv('hopstr/static/hops.csv',index=True, cols=['alpha','beta','humulene','caryophyllene','cohumulone','myrcene'],header=['alpha','beta','humulene','caryophyllene','cohumulone','myrcene']) g.db.close() return hopdf
def read_raw_featurs_from_DB(con): fields = ['twitter.links',\ 'twitter.user.verified',\ 'twitter.user.listed_count',\ 'twitter.text',\ 'twitter.mentions',\ 'twitter.mention_ids',\ 'klout.score',\ 'twitter.hashtags',\ 'twitter.user.statuses_count',\ 'twitter.user.followers_count',\ 'twitter.user.friends_count',\ 'twitter.user.geo_enabled',\ 'language.confidence',\ 'twitter.user.lang',\ 'twitter.created_at',\ 'twitter.user.created_at',\ 'Newsworthy'] fieldsConc = '[' + '],['.join(fields) + ']' sqlQuery = "SELECT md.match_rowid , {} FROM MasterData AS md JOIN AnnotationsBatch4 AS an \ ON md.match_rowid = an.match_rowid \ WHERE an.Newsworthy IS NOT NULL".format(fieldsConc) data = psql.read_frame(sqlQuery, con) return data
def get_rm_demand(connection, dateBeg, dateEnd, company): '''rm demand by parent company, from Ramu''' q = """Select com.RM_Demand_by_trader.DTTM_ID, com.RM_Demand_by_trader.Trading_Date, com.RM_Demand_by_trader.Trading_Period, com.MAP_Participant_names.Parent_Company_ID, com.MAP_NSP_POC_to_region.ISLAND, Sum(com.RM_Demand_by_trader.RM_demand) As 'RMLoad' From com.RM_Demand_by_trader Inner Join com.MAP_Participant_names On com.RM_Demand_by_trader.Trader_ID = com.MAP_Participant_names.Trader_Id Inner Join com.MAP_NSP_POC_to_region On com.RM_Demand_by_trader.POC = com.MAP_NSP_POC_to_region.POC Where com.RM_Demand_by_trader.Trading_Date >= '%s' And com.RM_Demand_by_trader.Trading_Date <= '%s' And com.MAP_Participant_names.Parent_Company_ID = '%s' Group By com.RM_Demand_by_trader.DTTM_ID, com.RM_Demand_by_trader.Trading_Date, com.RM_Demand_by_trader.Trading_Period, com.MAP_Participant_names.Parent_Company_ID, com.MAP_NSP_POC_to_region.ISLAND Order By com.RM_Demand_by_trader.DTTM_ID, com.MAP_NSP_POC_to_region.ISLAND""" % ( dateBeg.strftime("%Y-%m-%d"), dateEnd.strftime("%Y-%m-%d"), company) t = sql.read_frame(q, connection, coerce_float=True) return t
def _read_apsim_db(apsimDbConn, start, chunksize): ''' Read apsimData.sqlite database. Parameters ---------- apsimDbConn : sqlite connection object connection to database start : int where to start limiting the data returned chunksize : int size of chunks to read from the database Returns ------- A dataframe of daily data. ''' with apsimDbConn: # read data from the outputFields table outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn) outputFields = list(outputFields['name']) outputFields = ', '.join(outputFields) # read main data sql = "SELECT point_id, {outputFields} FROM apsimOutput LIMIT {start}, {chunksize}".format(outputFields=outputFields, start=start, chunksize=chunksize) dailyData = pandas.io.sql.read_frame(sql, apsimDbConn) return dailyData
def update_output_fields_table(masterDbConn, runPath): ''' Updates the outputFields table in the master run database. If a field alredy exists it is skipped, otherwise it is added. Parameters ---------- masterDbConn : sqlite connection object master database to connect to runPath : string path to the run folder for the apsimData.sqlite database for a particular run Returns ------- A list of fields that were updated in the table. ''' # get the run database path apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite') # open run database apsimDbConn = lite.connect(apsimDbPath) with apsimDbConn: # read data from the outputFields table outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn) with masterDbConn: # write outputFields to master database try: psql.write_frame(outputFields, 'outputFields', masterDbConn) except ValueError:# as e: # if table already exists then do nothing #print '*** Warning: {} Skipping write.'.format(e) pass
def get_rm_generation(connection, dateBeg, dateEnd, company): '''rm generation by parent company, from Ramu''' q = """Select com.RM_Generation_by_trader.DTTM_ID, com.RM_Generation_by_trader.POC, com.MAP_Participant_names.Parent_Company_ID, Sum(com.RM_Generation_by_trader.RM_generation) As 'RMGen' From com.MAP_Participant_names Inner Join com.RM_Generation_by_trader On com.RM_Generation_by_trader.Trader_ID = com.MAP_Participant_names.Trader_Id Where com.RM_Generation_by_trader.Trading_Date >= '%s' And com.RM_Generation_by_trader.Trading_Date <= '%s' And com.MAP_Participant_names.Parent_Company_ID Like '%s' Group By com.RM_Generation_by_trader.DTTM_ID, com.RM_Generation_by_trader.POC, com.MAP_Participant_names.Parent_Company_ID Order By com.RM_Generation_by_trader.DTTM_ID, com.MAP_Participant_names.Parent_Company_ID, com.RM_Generation_by_trader.POC""" % ( dateBeg.strftime("%Y-%m-%d"), dateEnd.strftime("%Y-%m-%d"), company) t = sql.read_frame(q, connection, coerce_float=True) return t
def getAllDataFrame(sym,startdate,dbconn): print "getAllDataFrame",sym,startdate sqlstr = "select * from stockeod where symbol='%s' and sdate>='%s'" % (sym,startdate); #cursor = dbconn.cursor() try: tb="" #cursor.execute(sql) df = sql.read_frame(sqlstr, dbconn) #results = cursor.fetchall() #df = DataFrame(cursor.fetchall()) #df.columns = cursor.keys() return df #import MySQLdb as mdb #from pandas import * #conn = mdb.connect('<server>','<user>','<pass>','<db>'); except: print "error" tb = traceback.format_exc() print tb return
def fetchHistData(codearg,namearg,symbol,startdate): print 'fetchHistData' dbname = 'hist_db_'+codearg+'_'+namearg+'.sqlite' con = sqlite3.connect("../data/hist/"+dbname) query = "SELECT * FROM sqlite_master WHERE type='table'" df = pd.io.sql.read_frame(query,con) tablelen = len(df) print 'hist tablelen:',tablelen tablename = 'result_'+codearg+'_'+namearg histdf = pd_sql.read_frame("SELECT * from "+tablename, con) from pandas.lib import Timestamp histdf.Date = histdf.Date.apply(Timestamp) histdf2 = histdf.set_index('Date') histdf2 = histdf2[histdf2.index >= startdate] # histdf.index = histdf['Date'] # histdf = histdf.drop('Date',1) print 'histdf from db:' print histdf2.head() print histdf2.tail() print 'hist index type:',type(histdf2.index) con.close() return histdf2
def __init__(self): secrets_file = open('secrets.json','rb') secrets = json.load(secrets_file) secrets_file.close() self.blog_name = "wheredidmypostgo" # Build an Authorized Tumblr Client self.tb_client = pytumblr.TumblrRestClient(**secrets['tumblr_tokens']) self.etl_controller = app.etl_controller() max_end_date = date.today() - timedelta(days=3) sql = """ select blog_name, avg(ClosenessCentrality) as 'ClosenessCentrality' from tb_reblog_graphs where reblogged_root_name in (%s) and end_date > '%s' and blog_name not in ('wheredidmypostgo', %s) group by blog_name order by avg(ClosenessCentrality) DESC """ % ("'"+"','".join(self.etl_controller.target_blogs)+"'", max_end_date.isoformat() , "'"+"','".join(self.etl_controller.target_blogs)+"'") self.influencer_df = psql.read_frame(sql,self.etl_controller.mysql_connection) self.influencer_df['pdf'] = self.influencer_df.ClosenessCentrality / self.influencer_df.ClosenessCentrality.sum() self.influencer_df['cdf'] = self.influencer_df.sort(column='pdf',ascending=False).pdf.cumsum() sql = """ select tag from tb_posts inner join tb_posttag_level on tb_posttag_level.`post_id` = tb_posts.id where tb_posts.blog_name = 'wheredidmypostgo' """ curs = self.etl_controller.mysql_connection.cursor() curs.execute(sql) all_tags = curs.fetchall() self.most_common_tags = [t[0] for t in Counter(all_tags).most_common(n=200)] curs.close() response = self.tb_client.posts('wheredidmypostgo', notes_info='true') self.posts = response['posts'] for offset in range(20,response['total_posts'],20): response = self.tb_client.posts('wheredidmypostgo', notes_info='true', offset=offset) self.posts.extend(response['posts']) self.notes = [] for p in self.posts: if p['note_count'] > 0: self.notes.extend(p['notes']) self.notes_df = pd.DataFrame(self.notes) self.notes_df['date'] = self.notes_df.timestamp.apply(float).apply(datetime.fromtimestamp) self.todays_notes = self.notes_df[self.notes_df.date >= (datetime.now() - timedelta(hours=4))].sort(column='date', ascending=False).head(50)
def get_qwop(connection, dateBeg, dateEnd, company): '''Quantity weighted offer price query, from Ramu''' q = """Select com.Fp_Offers.DTTM_ID, com.Fp_Offers.Trading_DATE as 'Date', com.Fp_Offers.Trading_Period as 'TP', com.MAP_PNode_to_POC_and_Island.Island, com.MAP_Participant_names.Parent_Company_ID, (Sum((com.Fp_Offers.Offer_Price * com.Fp_Offers.Offer_Quantity)) / Sum(com.Fp_Offers.Offer_Quantity)) As 'QWOP' From com.Fp_Offers Inner Join com.MAP_Participant_names On com.Fp_Offers.Trader_Id = com.MAP_Participant_names.Trader_Id Inner Join com.MAP_PNode_to_POC_and_Island On com.Fp_Offers.PNode = com.MAP_PNode_to_POC_and_Island.PNode Where com.Fp_Offers.Trading_DATE >= '%s' And com.Fp_Offers.Trading_DATE <= '%s' And com.Fp_Offers.trade_type = 'ENOF' And com.MAP_Participant_names.Parent_Company_ID = '%s' And com.MAP_PNode_to_POC_and_Island.Island = 'SI' Group By com.Fp_Offers.DTTM_ID, com.Fp_Offers.Trading_DATE, com.Fp_Offers.Trading_Period, com.MAP_PNode_to_POC_and_Island.Island, com.MAP_Participant_names.Parent_Company_ID order by com.Fp_Offers.DTTM_ID""" % (dateBeg.strftime("%Y-%m-%d"), dateEnd.strftime("%Y-%m-%d"), company) t = sql.read_frame(q, connection, coerce_float=True) t['Date'] = t['Date'].map(lambda x: date_converter(x)) t = t.set_index(['Date', 'TP']).QWOP return t
def GetRowsDataFrameFromSelect(self, SelectSql): conn = self.Connect(as_dict=False) print SelectSql df = sql.read_frame(SelectSql, conn) print ' Rows read = ' + str(df.shape[0]) conn.close() return df
def get(self, lowerupper): lower, upper = lowerupper.split(":") print lower, upper lower = str(int(lower) + 1) conn = get_db() sql = "SELECT * FROM signal WHERE ROWID >= %s AND ROWID < %s" % (lower, upper) df = psql.read_frame(sql, conn) return jsonify(df.to_dict())
def data_firm(): query = ''' select firm_name,website_url from investor_company_url where result_rank = 0;''' url_data = psql.read_frame(query, conn) url_json = url_data.to_json() url_json_load = json.loads(url_json) return json.dumps(url_json_load)
def _check_roundtrip(self, frame): sql.write_frame(frame, name='test_table', con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected)
def data_firm(): query = """ select firm_name,website_url from investor_company_url where result_rank = 0;""" url_data = psql.read_frame(query, conn) url_json = url_data.to_json() url_json_load = json.loads(url_json) return json.dumps(url_json_load)
def _check_roundtrip(self, frame): sql.write_frame(frame, name="test_table", con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame["txt"] = ["a"] * len(frame) frame2 = frame.copy() frame2["Idx"] = Index(range(len(frame2))) + 10 sql.write_frame(frame2, name="test_table2", con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col="Idx") expected = frame.copy() expected.index = Index(range(len(frame2))) + 10 tm.assert_frame_equal(expected, result)
def get(self, lowerupper): lower, upper = lowerupper.split(':') print lower, upper lower = str(int(lower) + 1) conn = get_db() sql = 'SELECT * FROM signal WHERE ROWID >= %s AND ROWID < %s' % ( lower, upper) df = psql.read_frame(sql, conn) return jsonify(df.to_dict())
def updateRangeInfo(self,seriesNames, logfile = None): ''' updates the range info in seriesList so that it matches what's in db. should be run before/after importing new series ''' errortables = [] if logfile !=None: logging.basicConfig(filename= logfile, filemode='w', level = logging.ERROR) self.connect() for name in seriesNames: logging.info("updateRangeInfo for {}".format(name)) sqlRead = "SELECT Date FROM {0}".format(name) #Read series data range try: dates = psql.read_frame(sqlRead, con = self.con ) except Exception as e: errortables.append(name) logging.error("updateRangeInfo: Reading table, encountered error <<{0}>>".format(e)) continue #Convert to datetime objects dates = dates.apply(pd.to_datetime) StartRange = dates.min().iloc[0] #still series object so have to get data EndRange = dates.max().iloc[0] #Construct sql update query sqlWrite = "UPDATE SeriesList SET StartRange = '{0}', ".format(StartRange) sqlWrite += "EndRange = '{0}' ".format(EndRange) sqlWrite += "WHERE SeriesName = '{0}';".format(name) #print sqlWrite cur = self.con.cursor() try: cur.execute(sqlWrite) except Exception as e: logging.error("updateRangeInfo: Error executing write dates, encountered error <<{0}>>".format(e)) errortables.append(name) continue else: self.con.commit() self.disconnect() return errortables
def plot(): consqlite = sqlite3.connect('d:\\DataMining\\Tesis\\DATASET SUBE\\base.db') consqlite.text_factory = str df = sql.read_frame( 'SELECT MINDISTANCIA from MINDISTANCIA WHERE LINEAMT = 114 AND RAMALMT = 361 AND RAMALGEO = "B" ', consqlite) df = filtroDesvioStd(df, 'MINDISTANCIA', 6) hist([val[0] for val in df.values], 100, (0, 200)) #BOXPLOT consqliteBase = sqlite3.connect('base.db') consqliteBase.text_factory = str df = sql.read_frame( 'SELECT A.*, B.AREAGEOGRAFICA FROM LINEASTRXGEO A, (SELECT DISTINCT LINEA, RAMAL, AREAGEOGRAFICA FROM LINEARAMALTRX) B WHERE A.LINEAMT = B.LINEA AND A.RAMALMT = B.RAMAL', consqliteBase) df = sql.read_frame('SELECT A.* FROM LINEASTRXGEO A ', consqliteBase) figure() boxplot(df['RMSE'], 0, 'gD', 0) boxplot(df['RMSE'], 0, '', 0) boxplot( df[df['AREAGEOGRAFICA'] == '1']['RMSE'], 0, ) #%pylab boxplot(df[df['AREAGEOGRAFICA'] == '12']['RMSE'].values, 0) boxplot(df[df['AREAGEOGRAFICA'] == '13']['RMSE'].values, 0) boxplot([ df[df['AREAGEOGRAFICA'] == '1']['RMSE'], df[df['AREAGEOGRAFICA'] == '12']['RMSE'], df[df['AREAGEOGRAFICA'] == '13']['RMSE'] ], 0, 'gx', 0) boxplot([ df[df['AREAGEOGRAFICA'] == '1']['RMSE'], df[df['AREAGEOGRAFICA'] == '12']['RMSE'], df[df['AREAGEOGRAFICA'] == '13']['RMSE'] ], 0, '') hist(df['RMSE'], 20)
def analyze(sqlite_path): con = sql.connect(sqlite_path) df = pd_sql.read_frame( "SELECT user_id, artist, timestamp FROM tracks LIMIT 1000", con, index_col='timestamp') df.index = pd.to_datetime(df.index.values * 1e9) uac_df = df.groupby(['user_id', 'artist']).resample('M', how='count') #uac_df.reindex(pd.date_range(min(uac_df.index), max(uac_df.index))) print uac_df.head(10)
def _check_roundtrip(self, frame): sql.write_frame(frame, name='test_table', con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() frame2['Idx'] = Index(range(len(frame2))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() expected.index = Index(range(len(frame2))) + 10 tm.assert_frame_equal(expected, result)
def get_raw_count_for_all(table, valid_criteria): query=""" SELECT count(*) as count FROM package, %s t inner join unit_location ul on ul.id = t.location_id WHERE ul.package_id = package.id and target like '%%ispmon.samknows.%%.measurement-lab.org' %s ; """ % (table, valid_criteria) return psql.read_frame(query, db)
def get_raw_tests_for_host(hostname, table, column, valid_criteria): query=""" SELECT dtime as date, date(dtime) as day, ul.package_id as package_id, %s FROM package, %s t inner join unit_location ul on ul.id = t.location_id WHERE ul.package_id = package.id and target like '%%ispmon.samknows.%s.measurement-lab.org' %s ; """ % (column, table, hostname, valid_criteria) return psql.read_frame(query, db)
def get_development_projects(): conn_string = orca.get_injectable('conn_string') if len(conn_string) == 0: print 'A "conn_string" injectable must be registered and populated. Skipping export-to-Urban-Canvas.' return None conn = psycopg2.connect(conn_string) cur = conn.cursor() print "Loading committed development projects table" devproj_query = "select id, placetype_id as building_type_id, duration, buildings_number, average_floors as stories, sqft as non_residential_sqft, sqft_unit as sqft_per_unit, units as residential_units, Name as name, start_date from developmentprojects where committed = 'TRUE';" devproj = sql.read_frame(devproj_query, conn) devproj['year_built'] = devproj.start_date.astype('object').astype('str') devproj.year_built = devproj.year_built.str.slice(start=0, stop=4) devproj.year_built = devproj.year_built.astype('int') print "Loading development project parcels" dp_pcl_query = "select developmentprojects_parcels.development_project, developmentprojects_parcels.parcel_id, parcel.parcel_acres from developmentprojects_parcels, parcel where developmentprojects_parcels.parcel_id = parcel.parcel_id;" dp_pcl = sql.read_frame(dp_pcl_query, conn) devproject_parcel_ids = dp_pcl.groupby( 'development_project').parcel_id.max().reset_index( ) ##In future, use the parcel_acres field on this tbl too scheduled_development_events = pd.merge(devproject_parcel_ids, devproj, left_on='development_project', right_on='id') scheduled_development_events = scheduled_development_events.rename( columns={ 'development_project': 'scheduled_development_event_id', 'building_type_id': 'development_type_id' }) scheduled_development_events = scheduled_development_events[[ 'scheduled_development_event_id', 'year_built', 'development_type_id', 'stories', u'non_residential_sqft', 'sqft_per_unit', 'residential_units', 'parcel_id' ]] for col in scheduled_development_events: scheduled_development_events[col] = scheduled_development_events[ col].astype('int') return scheduled_development_events
def main(): con = MySQLdb.connect(host="bigblasta.chiim1n4uxwu.eu-central-1.rds.amazonaws.com", user="******", passwd="Jo27051980", db="bigblasta") cursor = con.cursor() df = sql.read_frame('select t2.tid, t2.aid from (select tid from start_tracks order by rand() limit 1000)t1 inner join tracks t2 on t1.tid = t2.tid group by 1,2', con) subset = df[['tid', 'aid']] start_tracks = [tuple(x) for x in subset.values] if len(start_tracks)>0: # create a client object with your app credentials from multiprocessing.dummy import Pool as ThreadPool pool = ThreadPool(1) pool.map(processTrackEcho, start_tracks) pool.close()
def _get_avg_data(apsimDbConn, pointDailyData, harvestDates, sowDate): ''' Determines seasonal averages for data. Parameters ---------- apsimDbConn : sqlite connection object connection to database dailyData : pandas dataframe daily data values, indexed by date harvestDates : pandas dataframe string date of harvesting, indexed by year sowDate : string date of sowing (dd-mmm) Returns ------- Dataframe of yearly average data (rain, mint, maxt, radn, and irr_fasw). ''' # get unique years from data years = np.unique(pointDailyData.index.year) # convert sowDate to correct format sowDate = strptime(sowDate,'%d-%b') # read data from the outputFields table with apsimDbConn: outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn) outputFields = list(outputFields['name']) outputFields.remove('date') outputFields.remove('yield') yearlyAvgData = pandas.DataFrame({}) for field in outputFields: dataAvgs = {} for year in years: harvestDate = harvestDates[year] # check if harvestDate is a string if type(harvestDate) == type(''): rng = pandas.date_range('{0}/{1}/{2}'.format(sowDate.tm_mon, sowDate.tm_mday, year), harvestDate) # get the avg values and add to dataAvgs dictionary pointDailyDataMean = pointDailyData[field].ix[rng].mean() dataAvgs[year] = pointDailyDataMean else: # if harvestDate is not a string, set as NaN dataAvgs[year] = np.nan #print dataAvgs yearlyAvgData[field] = pandas.Series(dataAvgs) #print yearlyAvgData[field].head() return yearlyAvgData
def _check_roundtrip(self, frame): sql.write_frame(frame, name='test_table', con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! Change this once indexes are handled properly. result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() frame2['Idx'] = Index(lrange(len(frame2))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() expected.index = Index(lrange(len(frame2))) + 10 expected.index.name = 'Idx' print(expected.index.names) print(result.index.names) tm.assert_frame_equal(expected, result)