def test_if_exists(self): _skip_if_no_MySQLdb() df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']}) df_if_exists_2 = DataFrame({'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']}) table_name = 'table_if_exists' sql_select = "SELECT * FROM %s" % table_name def clean_up(test_table_to_drop): """ Drops tables created from individual tests so no dependencies arise from sequential tests """ if sql.table_exists(test_table_to_drop, self.db, flavor='mysql'): cur = self.db.cursor() cur.execute("DROP TABLE %s" % test_table_to_drop) cur.close() # test if invalid value for if_exists raises appropriate error self.assertRaises(ValueError, sql.write_frame, frame=df_if_exists_1, con=self.db, name=table_name, flavor='mysql', if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, flavor='mysql', if_exists='fail') self.assertRaises(ValueError, sql.write_frame, frame=df_if_exists_1, con=self.db, name=table_name, flavor='mysql', if_exists='fail') # test if_exists='replace' sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, flavor='mysql', if_exists='replace') self.assertEqual(sql.tquery(sql_select, con=self.db), [(1, 'A'), (2, 'B')]) sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, flavor='mysql', if_exists='replace') self.assertEqual(sql.tquery(sql_select, con=self.db), [(3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) # test if_exists='append' sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, flavor='mysql', if_exists='fail') self.assertEqual(sql.tquery(sql_select, con=self.db), [(1, 'A'), (2, 'B')]) sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, flavor='mysql', if_exists='append') self.assertEqual(sql.tquery(sql_select, con=self.db), [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name)
def update_output_fields_table(masterDbConn, runPath): ''' Updates the outputFields table in the master run database. If a field alredy exists it is skipped, otherwise it is added. Parameters ---------- masterDbConn : sqlite connection object master database to connect to runPath : string path to the run folder for the apsimData.sqlite database for a particular run Returns ------- A list of fields that were updated in the table. ''' # get the run database path apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite') # open run database apsimDbConn = lite.connect(apsimDbPath) with apsimDbConn: # read data from the outputFields table outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn) with masterDbConn: # write outputFields to master database try: psql.write_frame(outputFields, 'outputFields', masterDbConn) except ValueError:# as e: # if table already exists then do nothing #print '*** Warning: {} Skipping write.'.format(e) pass
def save_repos(count=1000000): """ to get list of repos .... the count is the number of requests to the API. Each request returns 100 repos to 'resume' this after already adding records to table, add ?since=x where x is the last ID in your table. I'm only saving 6 variables right now because most of the variables returned are URLs that follow a set structure and therefore could be easily built from the full_name' """ con = MySQLdb.connect("localhost", USER, PASSWORD, "git", charset='utf8') url = 'https://api.github.com/repositories' for x in xrange(1,count): req = requests.get(url,auth=(USER,PASSWORD)) url = req.links['next']['url'] df_temp = pn.DataFrame() if(req.ok): repoItem = req.json repos_df_temp = pn.DataFrame.from_dict(repoItem) df_temp = repos_df_temp[['id','name','private','full_name','description','fork']] df_temp = df_temp.fillna('') sql.write_frame(df_temp, con=con, name='repos', if_exists='append', flavor='mysql') print 'fetched 100 rows' time.sleep(1.0) return df_temp
def save_db(df, genedb_path): """Saves the data into the gene_features table. If the table already exists, the table is droped and then re-inserted. **Parameters** df : pd.DataFrame data to insert into DB table genedb_path : str path to sqlite db """ logger.debug('Dropping gene_features table IF EXISTS.') _utils.drop_table('gene_features', genes_db_path=genedb_path, kind='sqlite') # drop table if exists logger.debug('After dropping gene_features table IF EXISTS.') logger.info('Saving gene_features table ...') conn = sqlite3.connect(genedb_path) # open connection # save to sqlite3 database psql.write_frame(df, # pandas dataframe 'gene_features', # table name con=conn, # connection flavor='sqlite', # use sqlite if_exists='replace') # drop table if exists conn.close() logger.info('Finished saving gene_features table.')
def save_db(df, genedb_path): """Saves the data into the gene_features table. If the table already exists, the table is droped and then re-inserted. **Parameters** df : pd.DataFrame data to insert into DB table genedb_path : str path to sqlite db """ logger.debug('Dropping gene_features table IF EXISTS.') _utils.drop_table('gene_features', genes_db_path=genedb_path, kind='sqlite') # drop table if exists logger.debug('After dropping gene_features table IF EXISTS.') logger.info('Saving gene_features table ...') conn = sqlite3.connect(genedb_path) # open connection # save to sqlite3 database psql.write_frame( df, # pandas dataframe 'gene_features', # table name con=conn, # connection flavor='sqlite', # use sqlite if_exists='replace') # drop table if exists conn.close() logger.info('Finished saving gene_features table.')
def setup_test_data(): """Function uses sample githubarchive data. It saves a few hundred copies as a csv, as a python pickle file, as a hdf5 store, as mysql table and as mondodb. If you haven't run timing tests before, you will need to run this first. """ print 'use one hour of sample and replicate 100 times' #use only the repository data -- onehr_df = ghd.load_local_archive_dataframe() onehr_json = ghd.load_local_archive_json() one_hr_repo_df = ghd.unnest_git_json(onehr_df)['repository'] many_hr_repo_df = pn.DataFrame() for i in range(1,100): many_hr_repo_df = many_hr_repo_df.append(one_hr_repo_df) print('saving dataframe with', many_hr_repo_df.shape, "rows") print 'saving data to a csv file' many_hr_repo_df.to_csv('data/oneday.csv', encoding='utf-8') print 'dumping data to python pickle' pickle.dump(many_hr_repo_df, open('data/oneday.pyd', 'wb')) print 'dumping data to mysql database' con = mysql_setup() many_hr_repo_df_clean = many_hr_repo_df.fillna('') sql.write_frame(many_hr_repo_df_clean, 'oneday', con, 'mysql') print 'saving data to hdf5 filestore' store = pyt.HDFStore('data/git.h5') store.put('oneday', many_hr_repo_df) print 'saving data to mongodb' # repos_son = onehr_df['repository'] many_hr_repo_df = many_hr_repo_df.dropna() client = MongoClient() dbm = client['git'] collection = dbm['gittest'] # many_hr_repo_df = many_hr_repo_df.set_index(many_hr_repo_df.name) [collection.insert(onehr_json) for i in range(1,100)]
def test_keyword_as_column_names(self): ''' ''' _skip_if_no_MySQLdb() df = DataFrame({'From':np.ones(5)}) sql.write_frame(df, con = self.db, name = 'testkeywords', if_exists='replace', flavor='mysql')
def _check_roundtrip(self, frame): _skip_if_no_MySQLdb() drop_sql = "DROP TABLE IF EXISTS test_table" cur = self.db.cursor() with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unknown table.*") cur.execute(drop_sql) sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table", self.db) # HACK! Change this once indexes are handled properly. result.index = frame.index result.index.name = frame.index.name expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() index = Index(lrange(len(frame2))) + 10 frame2['Idx'] = index drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unknown table.*") cur.execute(drop_sql) sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() # HACK! Change this once indexes are handled properly. expected.index = index expected.index.names = result.index.names tm.assert_frame_equal(expected, result)
def _check_roundtrip(self, frame): _skip_if_no_MySQLdb() drop_sql = "DROP TABLE IF EXISTS test_table" cur = self.db.cursor() cur.execute(drop_sql) sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() frame2['Idx'] = Index(list(range(len(frame2)))) + 10 drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() cur.execute(drop_sql) sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() expected.index = Index(list(range(len(frame2)))) + 10 tm.assert_frame_equal(expected, result)
def test_legacy_write_frame(self): """Test legacy write frame name. Assume that functionality is already tested above so just do quick check that it basically works""" sql.write_frame( self.test_frame1, 'test_frame_legacy', self.conn, flavor='sqlite') self.assertTrue( sql.has_table('test_frame_legacy', self.conn, flavor='sqlite'), 'Table not written to DB')
def save_db(maf_path, db_path, hypermutator_count): # merge all data frames together with the first # data frames given priority over later data frames df_cols = ['Gene_Symbol', 'Tumor_Sample', 'Tumor_Type', 'Chromosome', 'Start_Position', 'End_Position', 'Variant_Classification', 'Reference_Allele', 'Tumor_Allele', 'Protein_Change'] df = pd.DataFrame(columns=df_cols) for single_maf in maf_path.split(','): tmp_df = pd.read_csv(single_maf, sep='\t') samp_names = set(df['Tumor_Sample'].tolist()) tmp_df = tmp_df[tmp_df['Tumor_Sample'].apply(lambda x: x not in samp_names)] df = pd.concat([df, tmp_df]) _utils.drop_table('maf_mutation', db_path, kind='sqlite') conn = sqlite3.connect(db_path) # open connection # save tsv to sqlite3 database psql.write_frame(df, # pandas dataframe 'maf_mutation', # table name con=conn, # connection flavor='sqlite', # use sqlite if_exists='replace') # drop table if exists # filter hypermutator samples filter_hypermutators(hypermutator_count, conn, db_path)
def save_repos(count=1000000): """ to get list of repos .... the count is the number of requests to the API. Each request returns 100 repos to 'resume' this after already adding records to table, add ?since=x where x is the last ID in your table. I'm only saving 6 variables right now because most of the variables returned are URLs that follow a set structure and therefore could be easily built from the full_name' """ con = MySQLdb.connect("localhost", USER, PASSWORD, "git", charset='utf8') url = 'https://api.github.com/repositories' for x in xrange(1, count): req = requests.get(url, auth=(USER, PASSWORD)) url = req.links['next']['url'] df_temp = pn.DataFrame() if (req.ok): repoItem = req.json repos_df_temp = pn.DataFrame.from_dict(repoItem) df_temp = repos_df_temp[[ 'id', 'name', 'private', 'full_name', 'description', 'fork' ]] df_temp = df_temp.fillna('') sql.write_frame(df_temp, con=con, name='repos', if_exists='append', flavor='mysql') print 'fetched 100 rows' time.sleep(1.0) return df_temp
def create_tables(masterDbConn, gridLut): ''' Creates each of the tables in the master run database. Parameters ---------- masterDbConn : sqlite connection object master database to connect to gridLut : pandas dataframe contains the grid information (point_id, lat, lon, county, etc.) Returns ------- Nothing. ''' with masterDbConn: # create runParameters table sql = "CREATE TABLE runParameters (run_id INTEGER PRIMARY KEY, met TEXT, crop TEXT, resolution REAL, clock_start TEXT, clock_end TEXT, crit_fr_asw REAL, sow_start TEXT, sow_end TEXT, harvest_date TEXT, soil_name TEXT)" masterDbConn.execute(sql) # create apsimOutput table # handeled in update_apsim_output_table() # create outputFields table # handeled in update_output_fields_table() # create gridPoints table psql.write_frame(gridLut, 'gridPoints', masterDbConn)
def save_db(maf_path, db_path, hypermutator_count): # merge all data frames together with the first # data frames given priority over later data frames df_cols = [ 'Gene_Symbol', 'Tumor_Sample', 'Tumor_Type', 'Chromosome', 'Start_Position', 'End_Position', 'Variant_Classification', 'Reference_Allele', 'Tumor_Allele', 'Protein_Change' ] df = pd.DataFrame(columns=df_cols) for single_maf in maf_path.split(','): tmp_df = pd.read_csv(single_maf, sep='\t') samp_names = set(df['Tumor_Sample'].tolist()) tmp_df = tmp_df[tmp_df['Tumor_Sample'].apply( lambda x: x not in samp_names)] df = pd.concat([df, tmp_df]) _utils.drop_table('maf_mutation', db_path, kind='sqlite') conn = sqlite3.connect(db_path) # open connection # save tsv to sqlite3 database psql.write_frame( df, # pandas dataframe 'maf_mutation', # table name con=conn, # connection flavor='sqlite', # use sqlite if_exists='replace') # drop table if exists # filter hypermutator samples filter_hypermutators(hypermutator_count, conn, db_path)
def test_uquery(self): try: import MySQLdb except ImportError: raise nose.SkipTest frame = tm.makeTimeDataFrame() drop_sql = "DROP TABLE IF EXISTS test_table" cur = self.db.cursor() cur.execute(drop_sql) sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' self.assertEqual(sql.uquery(stmt, con=self.db), 1) try: sys.stdout = StringIO() self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, 'insert into blah values (1)', con=self.db) self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, 'insert into blah values (1)', con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def test_tquery(self): try: import MySQLdb except ImportError: raise nose.SkipTest frame = tm.makeTimeDataFrame() drop_sql = "DROP TABLE IF EXISTS test_table" cur = self.db.cursor() cur.execute(drop_sql) sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') result = sql.tquery("select A from test_table", self.db) expected = frame.A result = Series(result, frame.index) tm.assert_series_equal(result, expected) try: sys.stdout = StringIO() self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, 'select * from blah', con=self.db) self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, 'select * from blah', con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def load_company_basic_info(): #下载公司基本信息,包括股票代码、pe、市盈率等数据 try: rs=ts.get_stock_basics() sql.write_frame(rs, "company_basic_info", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("公司基本信息数据ok") except: print("公司基本信息数据出错")
def import_to_db(conn, df): try: psql.write_frame(df, 'locallists', conn, flavor="mysql", if_exists='append', index=None) conn.commit() finally: conn.close() print 'mysql done'
def create_bottom_100_all_time(self, bottom_100_all_time): """ bottom_100_all_time should be a pandas DataFrame that comes from the funtion reviewskimmer.imdb.charts.get_bottom_100_all_time. """ psql.write_frame(bottom_100_all_time, con=self.db, name='rs_bottom_100_all_time', if_exists='replace', flavor='mysql')
def main(): infile = "./all_players.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name='players', if_exists='replace', flavor='mysql') cur.close() db.close()
def _check_roundtrip(self, frame): sql.write_frame(frame, name='test_table', con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected)
def _write_table(tablename, df, conn): "writes a dataframe to the sqlite database" for col in df.columns: if re.search("[() ]", col): msg = "please follow SQLite column naming conventions: " msg += "http://www.sqlite.org/lang_keywords.html" raise Exception(msg) write_frame(df, name=tablename, con=conn, flavor='sqlite')
def saveDFToDB(self, results, table_name=None): #NOTE Always drop ? if not table_name: table_name = '_'.join(('dataframe', dt.datetime.strftime(dt.datetime.now(), format='%d%m%Y'))) self._log.info('Dropping previous table') self.execute('drop table if exists %s' % table_name) #self._log.info('Saving results (id:{})'.format(self.monthly_perfs['created'])) results['date'] = results.index pd_sql.write_frame(results, table_name, self._connection) return table_name
def update_apsim_output_table(masterDbConn, runPath, update): ''' Updates the apsimOutput table in the master run database. If a run is already there it is updated, otherwise it is added. Parameters ---------- masterDbConn : sqlite connection object master database to connect to runPath : string path to the run folder for the apsimData.sqlite database for a particular run update : bool if the database needs to be updated or if it is the first commit for a particular run Returns ------- Nothing. ''' # get the runId runId = int(os.path.split(runPath)[1]) # don't do anything if the database is being updated if update == True: print "*** Warning: Run {} data may already exist. Skipping write.".format(runId) return # get sow start from parameters table sql = "SELECT sow_start FROM runParameters WHERE run_id = {}".format(runId) sowStart = psql.read_frame(sql, masterDbConn).ix[0][0] # check to see if sow date is auto (determined from lookup table) if sowStart == 'auto': # read sow start for each location sql = "SELECT point_id, sow_start FROM gridPoints" sowDates = psql.read_frame(sql, masterDbConn, index_col='point_id') else: # set sow start the same for each location sql = "SELECT point_id FROM gridPoints" gridPoints = psql.read_frame(sql, masterDbConn) sowDates = pandas.DataFrame([sowStart] * len(gridPoints), index=gridPoints['point_id']) # get the run database path apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite') # read and convert to yearly formatted data apsimData = _apsim_output(apsimDbPath, sowDates) # add column with runId runIdSeries = pandas.Series([runId] * len(apsimData)) apsimData['run_id'] = runIdSeries # write runData to master database psql.write_frame(apsimData, 'apsimOutput', masterDbConn, if_exists='append')
def main(): # write_sched_csv() infile = "./schedules.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name="schedules", if_exists="replace", flavor="mysql") cur.close() db.close()
def main(): field_names = ['game_id','plyr_id','name','is_starter'] infile = "./past_starter_data.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name='past_starters', if_exists='replace', flavor='mysql') cur.close() db.close()
def toDB(con, table, tableName): #Drop table if it exists if psql.table_exists(tableName, con, flavor='sqlite'): cur = con.cursor() sql = 'DROP TABLE "main"."{}"'.format(tableName) cur.execute(sql) con.commit() #Write to db psql.write_frame(table, tableName , con) con.commit()
def toDB(con, table, tableName): #Drop table if it exists if psql.table_exists(tableName, con, flavor='sqlite'): cur = con.cursor() sql = 'DROP TABLE "main"."{}"'.format(tableName) cur.execute(sql) con.commit() #Write to db psql.write_frame(table, tableName, con) con.commit()
def main(): field_names = ['plyr_id', 'proj_pts','week'] infile = "./espn-proj2.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name='espn_projections', if_exists='replace', flavor='mysql') cur.close() del cur db.close() del db
def main(): field_names = ['plyr_id', 'name', 'position', 'is_starter'] infile = "./starters_2013.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name='current_starters', if_exists='replace', flavor='mysql') cur.close() del cur db.close() del db
def main(): field_names = ['game_id','coach_id', 'team', 'is_home', 'year'] infile = "./coach_data.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name='coaches', if_exists='replace', flavor='mysql') cur.close() del cur db.close() del db
def writeFrameToDB(self, df, SeriesName): #Write to db try: self.connect() psql.write_frame( df, SeriesName, self.con, if_exists='append', safe_names=False) self.con.commit() logging.info("Wrote series ()".format(SeriesName)) except: logging.error("Problems with {}".format(SeriesName)) raise finally: self.disconnect()
def main(): field_names = ['plyr_id', 'name', 'position', 'points', 'week'] infile = "./fantasy_full_season.csv" db = connect()[0] cur = connect()[1] cur.execute("USE fantasy_lineups;") df = pd.read_csv(infile) sql.write_frame(df, con=db, name='all_predictions', if_exists='replace', flavor='mysql') cur.close() del cur db.close() del db
def write_final(dirname, work, final, extract_methods): df = extract_data(work) if 'csv' in extract_methods: csv = os.path.join(final, dirname + ".csv") df.to_csv(csv, index=False, header=True) print "\tSUCCESS: Extracted data from .out file. CSV written to ./final/%s.csv" % dirname if 'sqlite3' in extract_methods: db_path = os.path.join(final, "data.db") conn = sqlite3.connect( db_path, timeout=10) # 10 seconds to avoid write deadlock? try: sqlio.write_frame(df, name='trees_fvsaggregate', con=conn, flavor='sqlite', if_exists='append') except sqlite3.IntegrityError as e: if e.message.endswith("are not unique"): # try to drop and rerun cursor = conn.cursor() delete_sql = """DELETE FROM trees_fvsaggregate WHERE var = '%(var)s' AND rx = %(rx)d AND cond = %(cond)d AND site = %(site)d AND climate = '%(climate)s' """ % df.irow(0) # assume the dataframe has the same data res = cursor.execute(delete_sql) if res.rowcount > 0: print "\tNOTICE : Deleting %d old rows from ./final/data.db" % res.rowcount # try again sqlio.write_frame(df, name='trees_fvsaggregate', con=conn, flavor='sqlite', if_exists='append') else: # something else went wrong conn.rollback() raise sqlite3.IntegrityError(e.message) conn.commit() conn.close() print "\tSUCCESS: Extracted data from .out file. Row appended to ./final/data.db"
def writeToDb(data, db_conn): """ Take the list of results and write to sqlite database """ data_frame = pd.DataFrame(data) data_frame['scrape_time'] = strftime("%Y%m%d%H%M%S", gmtime()) pdsql.write_frame( data_frame, "bikedata", db_conn, flavor="sqlite", if_exists="append", ) db_conn.commit()
def test_onecolumn_of_integer(self): # GH 3628 # a column_of_integers dataframe should transfer well to sql mono_df=DataFrame([1 , 2], columns=['c0']) sql.write_frame(mono_df, con = self.db, name = 'mono_df') # computing the sum via sql con_x=self.db the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) # it should not fail, and gives 3 ( Issue #3628 ) self.assertEqual(the_sum , 3) result = sql.read_frame("select * from mono_df",con_x) tm.assert_frame_equal(result,mono_df)
def importData(): #Start Time start = datetime(2010,1,1) end = datetime.date(datetime.now()) data = DataReader(sp500constituents[0], "yahoo", start, end) en = enumerate(sp500constituents) [i for i, x in en if x=='WFMI'] sp500constituents[200:len(sp500constituents)] problems = [] dataImportProblems = [] for series in sp500constituents[485:len(sp500constituents)]: print series try: data = DataReader(series, "yahoo", start, end) data = data.reset_index() except: print "Can't read {}".format(series) dataImportProblems.append(series) continue con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite") try: psql.write_frame( data, series, con) con.commit() except: print "Problems with {}".format(series) problems.append(series) finally: con.close() #changing tables to have date formats so RODBC driver recognizes #Should check that this is occuring above. con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite") for tb in sp500constituents: if psql.has_table(tb, con): sqltxt = "SELECT * FROM {}".format(tb) #print sqltxt data = psql.read_frame(sqltxt, con) sqlDropTxt = 'DROP TABLE "main"."{}"'.format(tb) #print sqlDropTxt psql.execute(sqlDropTxt, con) con.commit() psql.write_frame( data, tb, con) con.commit() con.close()
def test_tquery(self): frame = tm.makeTimeDataFrame() sql.write_frame(frame, name="test_table", con=self.db) result = sql.tquery("select A from test_table", self.db) expected = frame.A result = Series(result, frame.index) tm.assert_series_equal(result, expected) try: sys.stdout = StringIO() self.assertRaises(sqlite3.OperationalError, sql.tquery, "select * from blah", con=self.db) self.assertRaises(sqlite3.OperationalError, sql.tquery, "select * from blah", con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def test_onecolumn_of_integer(self): ''' GH 3628 a column_of_integers dataframe should transfer well to sql ''' mono_df=DataFrame([1 , 2], columns=['c0']) sql.write_frame(mono_df, con = self.db, name = 'mono_df') # computing the sum via sql con_x=self.db the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) # it should not fail, and gives 3 ( Issue #3628 ) self.assertEqual(the_sum , 3) result = sql.read_frame("select * from mono_df",con_x) tm.assert_frame_equal(result,mono_df)
def addSeriesToUpdateList(self, filename, newType = False, newSource=False): logging.basicConfig(level = logging.DEBUG) self.connect() #TODO maybe add these as data tables to update #validation lists existing_types = set(self.seriesList['Type']) existing_sources = set(self.seriesList['Source']) existing_series = set(self.seriesList['SeriesName']) ''' Adds series to be updated and checks info is ok''' thisSeriesList = pd.read_csv(filename) #Check correct colnames if not(self.seriesList.columns == list(thisSeriesList.columns)).all(): raise ValueError('Columns (names) in import file are incorrect') #Strip whitespace in table values thisSeriesList = thisSeriesList.applymap(lambda x : x.strip() ) #Convert Start and End to Datetime thisSeriesList[['StartRange', 'EndRange']] = thisSeriesList[['StartRange', 'EndRange']].applymap(pd.to_datetime) #Append to SeriesList for row in thisSeriesList.iterrows(): row = row[1] #itterow is tuple with second arg = value #Check type is allowable if (row['Type'] not in existing_types) and not newType: logging.error('Series {0} has type {1} not in existing types'.format(row['SeriesName'],row['Type'])) continue if row['Source'] not in existing_sources and not newSource: logging.error('Series {0} has source {1} not in existing sources'.format(row['SeriesName'],row['Source'])) continue if row['SeriesName'] in existing_series: logging.error('Series {0} is already in existing SeriesNames'.format(row['SeriesName'])) continue #if passes all checks then write to db logging.info('Wrote {} to SeriesList'.format(row['SeriesName'])) row = pd.DataFrame(row).transpose() psql.write_frame(row, 'SeriesList', self.con, if_exists='append') self.con.commit()
def test_uquery(self): frame = tm.makeTimeDataFrame() sql.write_frame(frame, name='test_table', con=self.db) stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' self.assertEqual(sql.uquery(stmt, con=self.db), 1) try: sys.stdout = StringIO() self.assertRaises(sqlite3.OperationalError, sql.tquery, 'insert into blah values (1)', con=self.db) self.assertRaises(sqlite3.OperationalError, sql.tquery, 'insert into blah values (1)', con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def test_tquery(self): frame = tm.makeTimeDataFrame() sql.write_frame(frame, name='test_table', con=self.db) result = sql.tquery("select A from test_table", self.db) expected = frame.A result = Series(result, frame.index) tm.assert_series_equal(result, expected) try: sys.stdout = StringIO() self.assertRaises(sqlite3.OperationalError, sql.tquery, 'select * from blah', con=self.db) self.assertRaises(sqlite3.OperationalError, sql.tquery, 'select * from blah', con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def processTrackEcho(tuple): track =int(tuple[0]) aid=int(tuple[1]) con = MySQLdb.connect(host="bigblasta.chiim1n4uxwu.eu-central-1.rds.amazonaws.com", user="******", passwd="Jo27051980", db="bigblasta") cursor = con.cursor() t0 = datetime.now() attribute_dict = echonest_audio(track) now = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') attribute_dict['aid']=aid attribute_dict['date']=now t1 = datetime.now() ser = Series(attribute_dict) df = DataFrame(ser).T sql.write_frame(df, con=con, name='echonest', if_exists='append', flavor='mysql') t2 = datetime.now() dur1 = (t1-t0).total_seconds() dur2 = (t2-t1).total_seconds() print "Echonest response: %f, MySQL: %f" %(dur1, dur2)
def write_values_as_scraperwiki_style_sql(base_dir): TABLE_NAME = "value" values = get_values_as_dataframe() values.replace(to_replace=[float('inf')], value=['na'], inplace=True) values['dsID'] = 'fts' values['is_number'] = 1 values['source'] = '' values = values.rename(columns={'indicator': 'indID', 'year': 'period'}) values = values[[ 'dsID', 'region', 'indID', 'period', 'value', 'is_number', 'source' ]] filename = os.path.join(base_dir, 'ocha.db') sqlite_db = sqlite3.connect(filename) sqlite_db.execute("drop table if exists {};".format(TABLE_NAME)) #values = values.reset_index() sql.write_frame(values, TABLE_NAME, sqlite_db) print values
def save_settle_data(self, product, start=None): """Like it says. If the datafile already exists it will bring it up to date, otherwise it will begin from 'start' which defaults to 6 months.""" self.product = product self.reset(product, self._today) fsettle = self.DATA + 'settle/' + product.lower() + '.sql' conn = sqlite3.connect(fsettle, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) cur = conn.cursor() try: cur.execute('SELECT timestamp FROM options AS "[timestamp]" GROUP BY timestamp ' \ 'ORDER BY timestamp') rows = cur.fetchall() except: rows = [] if not rows: if start is None: start = self._today + relativedelta(months=-6) else: start = business_day(rows[-1][0], 1, self.cal.holidays) while start < self._today: if not start in rows: print "Saving settle data for " + str(start) try: futf, optf = self.snap_by_delta(product, start) del futf['ticker'] del futf['last_trade'] optf['timestamp'] = start futf['timestamp'] = start optf.reset_index(inplace=True) futf.reset_index(inplace=True) sql.write_frame(optf, name='options', con=conn, if_exists='append') sql.write_frame(futf, name='futures', con=conn, if_exists='append') except Exception as e: print e start = business_day(start, 1, self.cal.holidays) conn.close()
def _check_roundtrip(self, frame): sql.write_frame(frame, name='test_table', con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() frame2['Idx'] = Index(range(len(frame2))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() expected.index = Index(range(len(frame2))) + 10 tm.assert_frame_equal(expected, result)
def snap(self, product): """Snap live data and bring settlement data up to date""" self.save_settle_data(product) # settlement data up-to-date. N.B. also calls reset() dnow = datetime.datetime.now() flive = self.DATA + 'live/' + product.lower() + '.sql' conn = sqlite3.connect(flive) cur = conn.cursor() futf, optf = self.snap_by_delta(product, dnow) del futf['ticker'] del futf['last_trade'] optf['timestamp'] = dnow futf['timestamp'] = dnow optf.reset_index(inplace=True) futf.reset_index(inplace=True) sql.write_frame(optf, name='options', con=conn, if_exists='append') sql.write_frame(futf, name='futures', con=conn, if_exists='append') conn.close()
def save2db(self, items='all'): """Save project info to database Args: items: 'jobs','results' and 'runsummary' respectively save jobs, results or run summary to the database; 'all' saves everything """ db_abspath = os.path.join(self.resultsdir_abspath, self.db_name) cnx = sqlite3.connect(db_abspath) if items == 'all' or items == 'jobs': sql.write_frame(self.jobs_df, name='Jobs', con=cnx, if_exists='append') if items == 'all' or items == 'results': sql.write_frame(self.results_df, name='Results', con=cnx, if_exists='append') if items == 'all' or items == 'runsummary': sql.write_frame(self.runsum_df, name='RunSummary', con=cnx, if_exists='append') cnx.close()
def _check_roundtrip(self, frame): sql.write_frame(frame, name='test_table', con=self.db) result = sql.read_frame("select * from test_table", self.db) # HACK! Change this once indexes are handled properly. result.index = frame.index expected = frame tm.assert_frame_equal(result, expected) frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() frame2['Idx'] = Index(lrange(len(frame2))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() expected.index = Index(lrange(len(frame2))) + 10 expected.index.name = 'Idx' print(expected.index.names) print(result.index.names) tm.assert_frame_equal(expected, result)