def _create_empty_table(full_table_name): """Creates an empty table based on a DataFrame.""" # Set create table string create_str = f'CREATE TABLE {full_table_name}' # Specify column names and data types create_col_list = _get_create_col_list(df, partitioned_by) partition_col_list = _get_partition_col_list(df, partitioned_by) sep_str = ',\n ' create_col_str = sep_str.join(create_col_list) partition_col_str = sep_str.join(partition_col_list) if len(partition_col_list) > 0: create_table_str = ('{create_str} (\n' ' {create_col_str}\n' ')\n' ' PARTITIONED BY (\n' ' {partition_col_str}\n' ');').format(**locals()) else: create_table_str = ('{create_str} (\n' ' {create_col_str}\n' ');').format(**locals()) if print_query: print(create_table_str) # Create the table with no rows psql.execute(create_table_str, engine) return create_col_list, partition_col_list
def createDB(): url = urlparse( 'mysql://*****:*****@127.0.0.1:13306/shop_database') # for Ops # url = urlparse('mysql+pymysql://stock@localhost:3306/stockdb') # for Dev global conn conn = mysql.connector.connect(host=url.hostname, port=url.port, user=url.username, database=url.path[1:], password=url.password) ############################################################ # table 作成 ############################################################ sql = "CREATE TABLE IF NOT EXISTS shop_info (" \ "url varchar(255) NOT NULL," \ "title varchar(128) NOT NULL," \ "description varchar(255)," \ "address varchar(255)," \ "lat double," \ "lng double," \ "first_entry datetime NOT NULL," \ "last_update datetime NOT NULL," \ "shop_group varchar(64) NOT NULL," \ "PRIMARY KEY(title, url)" \ ")" psql.execute(sql, conn) conn.commit()
def _add_rows_to_table(sub_df, partition_dict=None): """Adds a subset of rows to a SQL table from a DataFrame. The purpose of this is to do it in batches for quicker insert time. """ # FIXME: Incorporate inserting rows with partitions in batches insert_str = f'INSERT INTO {full_table_name}\n' # VALUES Clause # Each entry represents a row of the DataFrame that is being # inserted into the table values_list = [ _create_row_insert_sql(sub_df.iloc[i], partition_dict) for i in range(len(sub_df)) ] values_str = 'VALUES\n{}'.format(',\n'.join(values_list)) # Add PARTITION Clause if specified if partition_dict is not None: partition_vals_str = _get_partition_vals_str(partition_dict) partition_str = f'PARTITION {partition_vals_str}\n' insert_values_str = f'{insert_str}{partition_str}{values_str};' else: insert_values_str = f'{insert_str}{values_str};' if print_query: print(insert_values_str) psql.execute(insert_values_str, engine)
def clear_schema(engine): inspector = inspect(engine) available_table = inspector.get_table_names(schema='public') ignore_table = [ 'migrate_version', 'question', 'survey', 'form', 'question_group', 'answer', 'survey_instance', 'sync', 'spatial_ref_sys' ] delete_table = list( filter(lambda x: x not in ignore_table, available_table)) view_list = inspector.get_view_names(schema='public') new_views = [] for view_name in view_list: default_views = [ 'geography_columns', 'geometry_columns', 'raster_columns', 'raster_overviews' ] if view_name not in default_views: view_definition = inspector.get_view_definition(view_name, schema='public') view_definition = 'CREATE OR REPLACE VIEW ' + view_name + ' AS ' + view_definition new_views.append(view_definition) sql.execute('DROP VIEW IF EXISTS ' + view_name, engine) for tbl in delete_table: sql.execute('DROP TABLE IF EXISTS "{}"'.format(tbl), engine) return new_views
def unlock_form(project_name, arm_name, event_descrip, form_name, engine): """ Unlock a given form be removing records from table :param project_name: str :param arm_name: str :param event_descrip: str :param form_name: str :param engine: `sqlalchemy.Engine` :return: None """ # get ids needed for unlocking project_id = get_project_id(project_name, engine) arm_id = get_arm_id(arm_name, project_id, engine) event_id = get_event_id(event_descrip, arm_id, engine) # get a list of all the locked records and filter for records to remove locked_records = pd.read_sql_table('redcap_locking_data', engine) locked_forms = locked_records[(locked_records.project_id == project_id) & (locked_records.event_id == event_id) & (locked_records.form_name == form_name)] # generate the list of ids to drop and remove from db table locked_list = ', '.join([str(i) for i in locked_forms.ld_id.values.tolist()]) if locked_list: sql = 'DELETE FROM redcap_locking_data ' \ 'WHERE redcap_locking_data.ld_id IN ({0});'.format(locked_list) execute(sql, engine)
def insert_rating(self, user_id, movie_id, rating_value): sql.execute("INSERT INTO rating VALUES ({user}, {item}, {rating}, '{timestamp}')".format( user=user_id, item=movie_id, rating=rating_value, timestamp=int(str(datetime.timestamp(datetime.now()))[:10]), ), create_engine(self.conn_string))
def prepecosystem(): if sys.platform == 'linux': Base=os.path.expanduser('~') + '/VKHCG' else: Base='C:/VKHCG' ############################################################ sFileDir=Base + '/77-Yoke' if not os.path.exists(sFileDir): os.makedirs(sFileDir) ############################################################ sFileDir=Base + '/77-Yoke/10-Master' if not os.path.exists(sFileDir): os.makedirs(sFileDir) ############################################################ sFileDir=Base + '/77-Yoke/20-Slave' if not os.path.exists(sFileDir): os.makedirs(sFileDir) ############################################################ sFileDir=Base + '/77-Yoke/99-SQLite' if not os.path.exists(sFileDir): os.makedirs(sFileDir) ############################################################ sDatabaseName=Base + '/77-Yoke/99-SQLite/Yoke.db' conn = sq.connect(sDatabaseName) print('Connecting :',sDatabaseName) sSQL='CREATE TABLE IF NOT EXISTS YokeData (\ PathFileName VARCHAR (1000) NOT NULL\ );' sql.execute(sSQL,conn) conn.commit() conn.close() return Base,sDatabaseName
def stockscreener_calendar_doclean(CAL): calToHolidays ={'FR':['14/07', '15/08', '01/05', '26/12'], 'US':['04/07'], 'UK':['26/12'], 'JP':['02/01', '03/01', '31/12', '23/12']} c = conn.cursor() holi = pds.read_sql("SELECT date FROM stockscreener_calendar WHERE (CDR=%s) ORDER BY date ASC", conn, index_col='date', params=(CAL, )) holi = pds.to_datetime(holi.index) #add N0 to N+1 recursive holiday recursiveHolidays = [] for iYear in range(min(holi).year, datetime.datetime.utcnow().year + 2): if calToHolidays.has_key(CAL): for hol in calToHolidays[CAL]: iDay = int(hol.split('/')[0]) iMonth = int(hol.split('/')[1]) recursiveHolidays.append(datetime.date(iYear, iMonth, iDay)) recursiveHolidays.append(datetime.date(iYear, 01, 01)) recursiveHolidays.append(datetime.date(iYear, 12, 25)) recursiveHolidays = pds.to_datetime(recursiveHolidays) merge = recursiveHolidays.union(holi).drop_duplicates(keep='first') merge = pds.to_datetime(merge) df = pds.DataFrame({'date': merge, 'cdr': CAL}) df = df.set_index('date') query = '''DELETE FROM stockscreener_calendar WHERE (CDR='%s')''' % CAL sql.execute(query, engine) df.to_sql('stockscreener_calendar', engine, if_exists='append', index=True) logging.info('stockscreener_calendar cleaned! %s', CAL) c.close()
def importBankTransactions(): ### imports old transactions, Emma/Dan mint transactions, appends together and inserts into database start = time.time() if demo: ## only used to demo account to add dummy data. Is turned on above. df = pd.read_csv('DemoData/demotransactions.csv', parse_dates = ['transdate']) else: df = pd.read_csv('CSVs/oldtransactions.csv', parse_dates = ['transdate']) df_accrual = pd.read_csv('CSVs/accrual.csv', parse_dates = ['transdate']) df = df.append(df_accrual) new = time.time() start = new df = df.append(mintImport()) new = time.time() start = new df = df.append(stockPricesImport.stockincome()) new = time.time() start = new df = df.sort('transdate') df.to_sql('transactions', engine, if_exists = 'replace', index=False, dtype={'accountname': sqlalchemy.types.VARCHAR(length=30)}) sql.execute("CREATE INDEX transactions_transdate_index ON money.transactions (transdate);", engine) sql.execute("CREATE INDEX transactions_accountname_index ON money.transactions (accountname);", engine)
def test_execute_fail(self): _skip_if_no_MySQLdb() drop_sql = "DROP TABLE IF EXISTS test" create_sql = """ CREATE TABLE test ( a TEXT, b TEXT, c REAL, PRIMARY KEY (a(5), b(5)) ); """ cur = self.db.cursor() cur.execute(drop_sql) cur.execute(create_sql) sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) try: sys.stdout = StringIO() self.assertRaises(Exception, sql.execute, 'INSERT INTO test VALUES("foo", "bar", 7)', self.db) finally: sys.stdout = sys.__stdout__
def aux(f, conn, schema, return_dtype, arg_names): try: dtypes = f.__doc__.split(',') except AttributeError: raise Exception("You must supply argument types in the docstring") arg_def = arg_parser(arg_names, dtypes) lines = inspect.getsourcelines(f)[0][3:] fxn_code = ''.join(lines) fxn_name = f.__name__ params = { 'schema': schema, 'fxn_name': f.__name__, 'arg_def': arg_def, 'return_type': return_type, 'fxn_code': fxn_code } sql = ''' DROP FUNCTION IF EXISTS {schema}.{fxn_name} ({arg_def}); CREATE OR REPLACE FUNCTION {schema}.{fxn_name} ({arg_def}) RETURNS {return_type} AS $$ {fxn_code} $$ LANGUAGE plpythonu; '''.format(**params) psql.execute(sql, conn) print "Successfully created function: {schema}.{fxn_name}({arg_def})".format( **params)
def unlock_form(project_name, arm_name, event_descrip, form_name, engine, subject_id = None): """ Unlock a given form be removing records from table :param project_name: str :param arm_name: str :param event_descrip: str :param form_name: str :param engine: `sqlalchemy.Engine` :param subject_id: str :return: None """ # get ids needed for unlocking project_id = get_project_id(project_name, engine) arm_id = get_arm_id(arm_name, project_id, engine) event_id = get_event_id(event_descrip, arm_id, engine) # get a list of all the locked records and filter for records to remove locked_records = pd.read_sql_table('redcap_locking_data', engine) locked_forms = locked_records[(locked_records.project_id == project_id) & (locked_records.event_id == event_id) & (locked_records.form_name == form_name)] if subject_id : locked_forms = locked_forms[(locked_forms.record == subject_id)] # generate the list of ids to drop and remove from db table global locked_list locked_list = ', '.join([str(i) for i in locked_forms.ld_id.values.tolist()]) if locked_list: sql = 'DELETE FROM redcap_locking_data ' \ 'WHERE redcap_locking_data.ld_id IN ({0});'.format(locked_list) execute(sql, engine) return True else : return False
def totalbalances(): ### inserts daily balance data for all accounts to database from datetime import datetime a = sqlqueries.sqltotalbalances() ### bankaccounts, transactions, dates, fxrates df = pd.read_sql(a, engine, parse_dates='transdate') df['amount'] = df['amount'].fillna(0) df['balance'] = np.cumsum(df.groupby(['AccountName'])['amount']) # adds column of running total balances df = df[df['balance'] != 0 ] # removes zero balances which should be balances before account started df = df.sort('transdate') df = df[df['transdate'] <= datetime.today()] # removes any future dates #df['USDAmount'] = df.apply(lambda row: convert(row['balance'], row['Currency'], row['Rate']), axis=1) df['USDAmount'] = df.balance / df.Rate df.ix[df.Currency=='USD', 'USDAmount'] = df.ix[df.Currency=='USD', 'balance'] df['CADAmount'] = df.USDAmount * df.Rate df.balance = df.balance.round(2) df.USDAmount = df.USDAmount.round(2) df.CADAmount = df.CADAmount.round(2) df.to_sql('balances', engine, if_exists = 'replace', index=False) sql.execute("CREATE INDEX balances_transdate_index ON money.balances (transdate);", engine)
def insertSleepDataToCache(self, df_sleep_data) : """ Store the calculated sleep activities in the server. @type df_sleep_data: dataframe @param df_sleep_data: the dataframe containing data to be stored. """ #start_time = dt.datetime.now() if not df_sleep_data.empty : mysql_cn = pymysql.connect(host=self.DB_HOST, port=self.DB_PORT, user=self.DB_USERNAME, passwd=self.DB_PASSWORD, db=self.DB_NAME) for _, row in df_sleep_data.iterrows() : sql_query = "INSERT INTO " + self.DB_TABLE_CACHE \ + " (date, start_datetime, end_datetime,"\ + " count_micro_awakenings, time_micro_awakenings, "\ + " count_awakenings, time_awakenings) VALUES" \ + " ('" + row['date'].strftime('%Y-%m-%d') \ + "', '" \ + row['start_datetime'].strftime('%Y-%m-%d %H:%M:%S') + "', '"\ + row['end_datetime'].strftime('%Y-%m-%d %H:%M:%S') + "', '" \ + str(row['count_micro_awakenings']) + "', '"\ + str(row['time_micro_awakenings']) + "', '"\ + str(row['count_awakenings']) + "', '"\ + str(row['time_awakenings']) + "');" sql.execute(sql_query, mysql_cn) mysql_cn.commit() mysql_cn.close()
def connect_to_mysql_db_prod(sect, parameter_dict={}, DisplayPath=False, EchoSQL=False): # cnxdict = get_cnxdict(sect) cnxdict = read_db_config(sect) cnxdict['cnx'] = db.connect(host=cnxdict['host'], user=cnxdict['user'], passwd=cnxdict['password']) cnxdict['db'] = db.connect(host=cnxdict['host'], user=cnxdict['user'], passwd=cnxdict['password'], db=cnxdict['schema']) sql.execute("USE {}".format(cnxdict['schema']), cnxdict['db']) sql.execute("USE {}".format(cnxdict['schema']), cnxdict['cnx']) cnxdict['crs'] = cnxdict['db'].cursor() cnxdict['out_filepath'] = buildfilepath(cnxdict, DisplayPath=DisplayPath) cnxdict['out_csvpath'] = buildfilepath(cnxdict, DisplayPath=DisplayPath, fileext='csv') """ Currently I am adding this parameter to the root, but I think it would make sense to make this more dynamic and have a parameter dictionary that contains these individual parameters. """ for itemname in parameter_dict.keys(): cnxdict[itemname] = parameter_dict[itemname] return cnxdict
def __binarizeInParallel__(conn, table_name, output_table, cols, col_types_dict, col_distinct_vals_dict, label): ''' Transform the categorical columns into a collection of binary values columns and insert rows into this column in parallel using PL/Python function Inputs: ======= conn : A DBConnect object table_name : (string) Name of input table output_table : (string) Name of output table cols: (list) list of independent feature column names col_types_dict : (dict) a dict of column names and types col_distinct_vals_dict : (dict) a dict of column name, and the set of all distinct values in the column label : (string) label column name. If empty, it will be ignored. Outputs: ======= A new table is created with the rows of the original table transformed ''' pinsert_func = PARALLEL_INSERT_FUNC.format(table_name=table_name, output_table=output_table) psql.execute(pinsert_func, conn.getConnection()) pinsert_stmt = PARALLEL_INSERT_QUERY.format(output_table_name=output_table, table_name=table_name, cols = GP_STRING_IDENTIFIER.format(string_to_encode=pickle.dumps(cols)), col_types_dict = GP_STRING_IDENTIFIER.format(string_to_encode=pickle.dumps(col_types_dict)), col_distinct_vals_dict = GP_STRING_IDENTIFIER.format(string_to_encode=pickle.dumps(col_distinct_vals_dict)), label_col=label ) psql.execute(pinsert_stmt, conn.getConnection())
def drop_table(self,table_name): """ :param table_name: string type, db_name.table_name :return: """ drop_sql='drop table %s' % table_name sql.execute(drop_sql, self.engine)
def readJson(): global conn now = datetime.now().strftime("%Y/%m/%d %H:%M:%S") with open(JSONFILE) as f: df = json.load(f) for shop_info in df: print(shop_info) if 'address' not in shop_info: shop_info['address'] = "" keys = ",".join([ 'url', 'title', 'description', 'address', 'lat', 'lng', 'first_entry', 'last_update', 'shop_group' ]) values = '"%s","%s","%s", "%s", %f, %f, "%s", "%s", "%s"' % ( shop_info['url'], shop_info['title'], shop_info['desc'], shop_info['address'], shop_info['lat'], shop_info['lng'], now, now, shop_info['group']) sql = "INSERT INTO {} ({}) VALUES ({});".format( "shop_info", keys, values) print(sql) psql.execute(sql, conn) conn.commit()
def createOutputTable(self, tablehead): from pandas.io import sql i = 0 while i < len(time): print("...start creating", tablehead + time[i]) #table = str(tablehead + time[i]) sql.execute( "create table "+tablehead + time[i]+" \ (\ rolename VARCHAR2(200),\ mon DATE,\ wintype VARCHAR2(40),\ qt NUMBER,\ amount NUMBER,\ pmf FLOAT,\ sf FLOAT,\ ql VARCHAR2(10)\ )\ tablespace BIP_TBS\ pctfree 10\ initrans 1\ maxtrans 255\ storage\ (\ initial 80K\ next 1M\ minextents 1\ maxextents unlimited\ )", engine) i = i + 1
def drop_table(self,table): """ :param table_name: string type, db_name.table_name :return: """ drop_sql='drop table %s' % table sql.execute(delete_sql, self.engine)
def clear_schema(schema_name, conn, print_query=False): """ Remove all tables in a given schema. Inputs: schema_name - Name of the schema in SQL conn - A psycopg2 connection object print_query - If True, print the resulting query. """ sql = ''' SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' '''.format(schema_name) if print_query: print sql table_names = psql.read_sql(sql, conn).table_name for name in table_names: del_sql = 'DROP TABLE IF EXISTS {schema_name}.{table_name};'.format( schema_name=schema_name, table_name=name) psql.execute(del_sql, conn)
def __binarizeInParallel__(conn, table_name, output_table, cols, col_types_dict, col_distinct_vals_dict, label): ''' Transform the categorical columns into a collection of binary values columns and insert rows into this column in parallel using PL/Python function Inputs: ======= conn : A DBConnect object table_name : (string) Name of input table output_table : (string) Name of output table cols: (list) list of independent feature column names col_types_dict : (dict) a dict of column names and types col_distinct_vals_dict : (dict) a dict of column name, and the set of all distinct values in the column label : (string) label column name. If empty, it will be ignored. Outputs: ======= A new table is created with the rows of the original table transformed ''' pinsert_func = PARALLEL_INSERT_FUNC.format(table_name=table_name, output_table=output_table) psql.execute(pinsert_func, conn.getConnection()) pinsert_stmt = PARALLEL_INSERT_QUERY.format( output_table_name=output_table, table_name=table_name, cols=GP_STRING_IDENTIFIER.format(string_to_encode=pickle.dumps(cols)), col_types_dict=GP_STRING_IDENTIFIER.format( string_to_encode=pickle.dumps(col_types_dict)), col_distinct_vals_dict=GP_STRING_IDENTIFIER.format( string_to_encode=pickle.dumps(col_distinct_vals_dict)), label_col=label) psql.execute(pinsert_stmt, conn.getConnection())
def fximport(): if not demo: FXImport.fximport() df = pd.read_csv('Common/FX rates.csv', parse_dates = ['FXDate']) df.to_sql('fxrates', engine, if_exists = 'replace') sql.execute("CREATE INDEX FX_FXdate_index ON money.fxrates (FXDate);", engine)
def __createPivotTable__(conn, output_table, col_types_dict, col_names_and_types_lst, label): ''' Create a Pivot table, where every categorical column in the original table has been expanded into n columns, where n is the number of distinct values in the column Inputs: ======= conn : DBConnect object output_table : (string) name of the pivot table (output) col_types_dict : (dict) a dict of column names and types col_names_and_types_lst : (list) a list of column names and types, where any categorical column in the original table have label : (string) name of the label column (if it is an empty string, it will be ignored) Outputs: ======== A Pivot table is created. ''' cnames_and_types = ', '.join( [' '.join(pair) for pair in col_names_and_types_lst]) stmt = ''' ''' data_dict = {} data_dict['output_table'] = output_table data_dict['col_names_and_types'] = cnames_and_types if (col_types_dict.has_key('id') and label): stmt = ''' drop table if exists {output_table} cascade; create table {output_table} ({id_col} {id_col_type}, {col_names_and_types}, {label_col_name} {label_col_type} ); ''' data_dict['id_col'] = 'id' data_dict['id_col_type'] = col_types_dict['id'] data_dict['label_col_name'] = label data_dict['label_col_type'] = col_types_dict[label] elif (col_types_dict.has_key('id')): #ID column exists, but there is no label column specified stmt = ''' drop table if exists {output_table} cascade; create table {output_table} ({id_col} {id_col_type}, {col_names_and_types} ); ''' data_dict['id_col'] = 'id' data_dict['id_col_type'] = col_types_dict['id'] else: #Neither ID column nor label column exists (i.e there only are features in the table) stmt = ''' drop table if exists {output_table} cascade; create table {output_table} ( {col_names_and_types} ); ''' stmt = stmt.format(**data_dict) psql.execute(stmt, conn.getConnection())
def delete_data(self,table,condition=None): """ :param table_name: string type, db_name.table_name :param condition: string type, like 'field_value>50' :return: """ delete_sql=form_sql(table_name=table,oper_type='delete',where_condition=condition) sql.execute(delete_sql, self.engine)
def actRegistro(PosEngine, nroTarjetaExterno, codigoTrxTarjeta, procesado, paradaDestino, dia): try: #print ("UPDATE THE_VIAJESMAYO SET PROCESADO = '%s', PARADADESTIN = %i WHERE NROTARJETAEXTERNO = '%s' AND CODIGOTRXTARJETA = %i" %(procesado, paradaDestino, nroTarjetaExterno, codigoTrxTarjeta)) sql.execute("UPDATE THE_VIAJESMAYO_CRUCE%s SET PROCESADO = '%s', PARADADESTIN = %i WHERE NROTARJETAEXTERNO = '%s' AND CODIGOTRXTARJETA = %i" %(dia, procesado, paradaDestino, nroTarjetaExterno, codigoTrxTarjeta), PosEngine) return 1 except: print 'Error al actualizar' return 0
def __svmDemoCleanup__(conn): ''' Clean-up any tables that were created ''' psql.execute('drop table if exists svm_model cascade ;', conn.getConnection()) psql.execute('drop table if exists svm_model_param cascade ;', conn.getConnection())
def __createPivotTable__(conn, output_table, col_types_dict, col_names_and_types_lst, label): """ Create a Pivot table, where every categorical column in the original table has been expanded into n columns, where n is the number of distinct values in the column Inputs: ======= conn : DBConnect object output_table : (string) name of the pivot table (output) col_types_dict : (dict) a dict of column names and types col_names_and_types_lst : (list) a list of column names and types, where any categorical column in the original table have label : (string) name of the label column (if it is an empty string, it will be ignored) Outputs: ======== A Pivot table is created. """ cnames_and_types = ", ".join([" ".join(pair) for pair in col_names_and_types_lst]) stmt = """ """ data_dict = {} data_dict["output_table"] = output_table data_dict["col_names_and_types"] = cnames_and_types if col_types_dict.has_key("id") and label: stmt = """ drop table if exists {output_table} cascade; create table {output_table} ({id_col} {id_col_type}, {col_names_and_types}, {label_col_name} {label_col_type} ); """ data_dict["id_col"] = "id" data_dict["id_col_type"] = col_types_dict["id"] data_dict["label_col_name"] = label data_dict["label_col_type"] = col_types_dict[label] elif col_types_dict.has_key("id"): # ID column exists, but there is no label column specified stmt = """ drop table if exists {output_table} cascade; create table {output_table} ({id_col} {id_col_type}, {col_names_and_types} ); """ data_dict["id_col"] = "id" data_dict["id_col_type"] = col_types_dict["id"] else: # Neither ID column nor label column exists (i.e there only are features in the table) stmt = """ drop table if exists {output_table} cascade; create table {output_table} ( {col_names_and_types} ); """ stmt = stmt.format(**data_dict) psql.execute(stmt, conn.getConnection()) conn.conn.commit()
def delete_tables(datatables, engine): """ Delete the tables which we have dataframes for :param datatables: Dictionary of dataframes :param engine: Engine to be deleted from :return: """ for key in datatables: sql.execute('DROP TABLE IF EXISTS "%s"' % key, engine)
def update_to_finished(self): now = datetime.now().strftime("%Y/%m/%d %H:%M:%S") where = 'cc="%s" AND target_feature="%s" AND num_of_neuron=%d AND num_train=%d AND rnn="%s" AND status="running"' % ( self.cc, self.target_feature, self.num_of_neuron, self.num_train, self.rnn ) sql = 'UPDATE %s SET status="finished", finished_at="%s" WHERE %s' % (self.table_name, now, where) print(sql) psql.execute(sql, self.conn) self.conn.commit()
def insert_to_queue(self): now = datetime.now().strftime("%Y/%m/%d %H:%M:%S") keys = ",".join(['cc', 'target_feature', 'num_of_neuron', 'num_train', 'rnn', 'status', 'queued_at']) values = '"%s","%s",%d, %d,"%s","%s", "%s"' % ( ','.join(self.cc), self.target_feature, self.num_of_neuron, self.num_train, self.rnn, 'waiting', now ) sql = "INSERT INTO {} ({}) VALUES ({});".format(self.table_name, keys, values) psql.execute(sql, self.conn) self.conn.commit()
def insert_into_table(locations): query = """ INSERT INTO TBLelevation (latitude, longitude, elevation) VALUES """ for i in locations: query += "(" + str(i[0]) + ", " + str(i[1]) + ", " + str(i[2]) + ")," query = query[:-1] print query sql.execute(query, engine)
def importStockTransactions(): if demo: ## only used to demo account to add dummy data. Is turned on above. df = pd.read_csv('DemoData/StockTransactions.csv', parse_dates = ['transdate']) else: df = pd.read_csv('CSVs/StockTransactions.csv', parse_dates = ['transdate']) df.to_sql('stocktransactions', engine, if_exists = 'replace', index = False, dtype={'symbol': sqlalchemy.types.VARCHAR(length=20)}) sql.execute("CREATE INDEX Stocktransaction_transdate_index ON money.stocktransactions (transdate);", engine) sql.execute("CREATE INDEX Stocktransaction_symbol_index ON money.stocktransactions (symbol);", engine)
def insert_data(self,table,fields,data): """ :param table: string type, db_name.table_name :param fields: string type, like 'id,type,value' :param data: list type of list value, like: data=[['李5'],['黄9']] :return: """ fields='(' + fields +')' insert_sql=form_sql(table_name=table,oper_type='insert',insert_field=fields) sql.execute(insert_sql, self.engine, params=data)
def import_label_image_sub(path, imgid, ltid, engine): res = sql.execute('select MAX(ID) from Labels', engine) iid = res.first()[0] iid = 0 if iid is None else iid + 1 res.close() sql.execute( 'INSERT INTO Labels (ID, Image, IID, LTID) VALUES (?, ?, ?, ?)', engine, params=[(iid, str(path), imgid, ltid)]) return iid
def engine_init(): engine = create_engine( 'postgresql://*****:*****@localhost:5432/postgres') if not engine.dialect.has_schema(engine, 'book_club'): sql.execute('CREATE SCHEMA IF NOT EXISTS book_club', engine) else: sql.execute('DROP TABLE IF EXISTS book_club.books', engine) return engine
def put_simulation_result_sql(cc, target_feature, num_of_neuron, rnn, stats, iter = -1): url = urlparse('mysql+pymysql://stockdb:[email protected]:3306/stockdb') # for Ops # url = urlparse('mysql+pymysql://stock@localhost:3306/stockdb') # for Dev stats['cc'] = target_feature[0][0:4] stats['target_feature'] = "X".join(target_feature) stats['companion'] = ','.join(cc) stats['num_of_neuron'] = num_of_neuron stats['training_iter'] = iter stats['rnn'] = rnn stats['datetime'] = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") # sql用にNaNをNoneに置き換える。 stats = stats.where((pd.notnull(stats)), None) table_name = "simulation_stats" conn = mysql.connector.connect( host = url.hostname, port = url.port, user = url.username, database = url.path[1:], password = url.password ) # すでにデータがあるかどうかを確認 sql = 'SELECT COUNT(*) FROM %s WHERE cc="%s" ' \ 'AND target_feature="%s" ' \ 'AND companion="%s" ' \ 'AND num_of_neuron=%d ' \ 'AND training_iter=%d ' \ 'AND rnn="%s";' % (table_name, stats['cc'], stats['target_feature'], stats['companion'], stats['num_of_neuron'], stats['training_iter'], stats['rnn']) result_mysql = psql.execute(sql, conn) # if result_mysql.fetchone()[0] != 0: # データがある場合は一旦削除 sql = 'DELETE FROM %s WHERE cc="%s" AND target_feature="%s" AND companion="%s" AND num_of_neuron=%d AND ' \ 'training_iter=%d AND rnn="%s";' % (table_name, stats['cc'], stats['target_feature'], stats['companion'], stats['num_of_neuron'], stats['training_iter'], stats['rnn']) psql.execute(sql, conn) # keys = ",".join(stats.index) keys = ",".join(['count_all', 'count_buy', 'mean_buy', 'mean_sell', 'mean_gain', 'mean_buy_ratio', 'std_gain', 'mean_gain_r', 'std_gain_r', 'cc', 'target_feature', 'companion', 'num_of_neuron', 'training_iter', 'rnn', 'datetime']) values = '"{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}"'.format( stats['count_all'], stats['count_buy'], stats['mean_buy'], stats['mean_sell'], stats['mean_gain'], stats['mean_buy_ratio'], stats['std_gain'], stats['mean_gain_r'], stats['std_gain_r'], stats['cc'], stats['target_feature'], stats['companion'], stats['num_of_neuron'], stats['training_iter'], stats['rnn'], stats['datetime']) sql = "INSERT INTO {} ({}) VALUES ({});".format(table_name, keys, values) psql.execute(sql, conn) conn.commit() conn.close()
def get_name_score(data_folder, table_data_commom, i, to_mysql=True): # 导入参数及模型 parameter_path = data_folder + '\\preprocess_parameter.pkl' preprocess_parameter = joblib.load(parameter_path) model_name = 'GradientBoostingRegressor' model_path = data_folder + '\\%s.pkl' % model_name regs = joblib.load(model_path) # 缺失值 table_data_commom['exist_days'] = table_data_commom['exist_days'].fillna( preprocess_parameter['exist_days_mean']) table_data_commom = table_data_commom.fillna(0) # 标准化 table_data_commom['exist_days'] = table_data_commom['exist_days'].apply( lambda x: ml_train_test.MinMax(x, preprocess_parameter[ 'exist_days_min'], preprocess_parameter['exist_days_max'])) # 模型预测数据 company_name = table_data_commom['company_name'] model_data = table_data_commom.copy() for col in ['company_name', 'chanle_id']: if col in model_data.columns: model_data = model_data.drop(col, axis=1) model_data = model_data.astype(float) # 模型预测 company_score = regs.predict( table_data_commom.drop(['company_name', 'chanle_id'], axis=1)) company_score = pd.Series(company_score, name='company_score') company_name_score = pd.DataFrame([company_name, company_score]).T # 结果保存:mysql、csv db_name = 'fdm_3_mysql' table_name = 'company_name_score' save_filename = os.path.join(data_folder, '%s.csv' % table_name) # mysql if to_mysql: engine = data_IO.mysql_engine(db_name) if i == 1: sql.execute('drop table if exists %s' % table_name, engine) sql.to_sql(company_name_score, 'company_name_score', engine, schema=db_name, if_exists='append') # csv if i == 1: if os.path.exists(save_filename): os.remove(save_filename) company_name_score.to_csv(save_filename, index=False, mode='a', encoding='utf-8') # 追加数据
def importDatesTable(): ### creates table of dates for all dates from date specified until today + 400 days from helperfunctions import table_of_dates tableofdates = table_of_dates(2006,1,1,'D') tableofdates.reset_index(inplace=True) tableofdates.to_sql('datestable', engine, if_exists = 'replace', index=False, index_label='transdate') sql.execute("CREATE INDEX transdate_index ON money.datestable (transdate);", engine)
def _drop_table(table_name, schema, engine, print_query=False): """Drops a SQL table.""" if schema is None: drop_str = f'DROP TABLE IF EXISTS {table_name};' else: drop_str = f'DROP TABLE IF EXISTS {schema}.{table_name};' if print_query: print(drop_str) psql.execute(drop_str, engine)
def importBankAccounts(): if demo: ## only used to demo account to add dummy data. Is turned on above. df = pd.read_csv('DemoData/BankAccounts.csv') else: df = pd.read_csv('CSVs/BankAccounts.csv') df.to_sql('bankaccounts', engine, if_exists = 'replace', index=False, dtype={'AccountName': sqlalchemy.types.VARCHAR(length=30), 'MintAccountName': sqlalchemy.types.VARCHAR(length=30)}) sql.execute("CREATE INDEX AccountName_index ON money.bankaccounts (AccountName);", engine) sql.execute("CREATE INDEX MintAccountName_index ON money.bankaccounts (MintAccountName);", engine)
def insert_and_get_min_user_id(self): db_list = sql.read_sql("SELECT MIN(id) as user FROM users;", create_engine(self.conn_string)) #min_user_id = [m[1]['user'] for m in db_list.iterrows()][0] min_user_id = db_list.iloc[0]['user'] if min_user_id is None: min_user_id = 0 else: min_user_id = int(min_user_id) user_id = int(min_user_id - 1) sql.execute("INSERT INTO users VALUES ({user}, '')".format(user=user_id), create_engine(self.conn_string)) return user_id
def update_data(self,table,fields,values,condition=None): """ :param table: string type, db_name.table_name :param fields: string type, like 'value' :param values: string type, like: '1000' (for value type) or "'normal'" (for string type) :param condition: string type, like 'field_value>50' :return: """ if isinstance(values, str): values="'%s'"%values update_sql=form_sql(table_name=table,oper_type='update',update_field=fields,update_value=values,where_condition=condition) sql.execute(update_sql,self.engine)
def test_execute(self): frame = tm.makeTimeDataFrame() create_sql = sql.get_sqlite_schema(frame, "test") self.db.execute(create_sql) ins = "INSERT INTO test VALUES (?, ?, ?, ?)" row = frame.ix[0] sql.execute(ins, self.db, params=tuple(row)) self.db.commit() result = sql.read_frame("select * from test", self.db) result.index = frame.index[:1] tm.assert_frame_equal(result, frame[:1])
def clean_test_data(): """Function removes all datasets made for for testing purposes """ con = mysql_setup() query = 'drop table oneday;' sql.execute(con = con, sql = query) gh_csv = 'data/oneday.csv' gh_pick = 'data/oneday.pyd' gh_hd5 = 'data/git.h5' os.remove(gh_csv) os.remove(gh_pick) os.remove(gh_hd5)
def importData(): #Start Time start = datetime(2010,1,1) end = datetime.date(datetime.now()) data = DataReader(sp500constituents[0], "yahoo", start, end) en = enumerate(sp500constituents) [i for i, x in en if x=='WFMI'] sp500constituents[200:len(sp500constituents)] problems = [] dataImportProblems = [] for series in sp500constituents[485:len(sp500constituents)]: print series try: data = DataReader(series, "yahoo", start, end) data = data.reset_index() except: print "Can't read {}".format(series) dataImportProblems.append(series) continue con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite") try: psql.write_frame( data, series, con) con.commit() except: print "Problems with {}".format(series) problems.append(series) finally: con.close() #changing tables to have date formats so RODBC driver recognizes #Should check that this is occuring above. con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite") for tb in sp500constituents: if psql.has_table(tb, con): sqltxt = "SELECT * FROM {}".format(tb) #print sqltxt data = psql.read_frame(sqltxt, con) sqlDropTxt = 'DROP TABLE "main"."{}"'.format(tb) #print sqlDropTxt psql.execute(sqlDropTxt, con) con.commit() psql.write_frame( data, tb, con) con.commit() con.close()
def getStockPrices(): ### returns dataframe of stockprice history df = pd.DataFrame() # creates empty dataframe x = stocknames() # list all stocks owned as they would be entered into API # need to update to read from database automatically and generate list dffix = False for i in range(len(x)): try: if x[i][0] != 'MoneyMarket': df2 = pd.read_csv("http://real-chart.finance.yahoo.com/table.csv?s=%s&d=12&e=31&f=2016&g=d&a=1&b=1&c=2014&ignore=.csv" % x[i][0], parse_dates = ['Date'])[['Date','Close']] df2['Symbol'] = x[i][0] if i == 0: df = df2 else: df = df.append(df2, ignore_index=True) except: print x[i][0] + ' caused an error.' if not dffix: dffix = addoldsymbol(x[i][0]) else: dffix = dffix.append(addoldsymbol(x[i][0]), ignore_index=True) # iterate through called stock price list. append list of stock prices to list df.rename(columns={'Close': 'Price'}, inplace=True) # rename column to match database df = df[['Date','Symbol','Price']] df = df.append(dffix, ignore_index=True) df.columns = ['transdate','symbol','price'] # append MoneyMarket at price=1 for all dates since not an actual stockticker start_date = datetime.date(2013,1,1) index = pd.date_range(start=start_date, end=datetime.datetime.today(), freq='d') df2 = pd.DataFrame(columns=['transdate'],data=index) df2['symbol'] = 'MoneyMarket' df2['price'] = 1 df2 = df2[['transdate','symbol','price']] df = df.append(df2) df.to_sql('stocksprices', engine, if_exists = 'replace', dtype={'symbol': sqlalchemy.types.VARCHAR(length=20), 'transdate': sqlalchemy.types.DATETIME()}) sql.execute("CREATE INDEX Stocksprices_transdate_index ON money.stocksprices (transdate);", engine) sql.execute("CREATE INDEX Stocksprices_symbol_index ON money.stocksprices (symbol);", engine)
def execute(self, sql, commit = True, debug = False): """ Execute une requete mysql qui ne renvoie pas de résultats. """ try: if debug == True: info("Execution de la requete %s" %sql) psql.execute(sql, con=self.con, cur=self.cur) self.con.commit() if debug == True: info('Requete %s exécutée.' %(self.cur.statusmessage)) except MySQLdb.Error, e: error(' %s' %e)
def test_execute(self): _skip_if_no_MySQLdb() frame = tm.makeTimeDataFrame() drop_sql = "DROP TABLE IF EXISTS test" create_sql = sql.get_schema(frame, 'test', 'mysql') cur = self.db.cursor() cur.execute(drop_sql) cur.execute(create_sql) ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" row = frame.ix[0] sql.execute(ins, self.db, params=tuple(row)) self.db.commit() result = sql.read_frame("select * from test", self.db) result.index = frame.index[:1] tm.assert_frame_equal(result, frame[:1])
def send_price(self, isin, bid_price, ask_price, bid_size, ask_size): bbrgSec6id = isin bbrgTrana = 'B' bbrgVala = bid_price bbrgTranb = 'Z' bbrgValb = str(bid_size) bbrgTranc = 'A' bbrgValc = ask_price bbrgTrand = 'Z' bbrgVald = str(ask_size) now = datetime.datetime.now() bbrgDate = now.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] bbrgTime = now.strftime('%H:%M:%S') bbrgSec6id = isin sqlfields = "bbrgTrana='{}', bbrgVala='{}', bbrgTranb='{}', bbrgValb='{}',bbrgTranc='{}', bbrgValc='{}', bbrgTrand='{}', bbrgVald='{}', bbrgDate='{}', bbrgTime='{}', bbrgStatus='{}', bbrgSend6='Y' WHERE bbrgSec6id='{}'".format(bbrgTrana,bbrgVala,bbrgTranb,bbrgValb,bbrgTranc,bbrgValc,bbrgTrand,bbrgVald,bbrgDate,bbrgTime, bbrgStatus, bbrgSec6id) #print "UPDATE tblQuote SET " + sqlfields sql.execute("UPDATE tblQuote SET " + sqlfields, self.engine) pass
def execute(self, sql, notices=False): """ Executes query in PostgreSQL. Can run multiple queries at a time. Transactions work as everything will be rolled back if error. :param sql: The query :param notices: Print PostgreSQL notices if there are. """ con = pg.connect(self.conn_string) psql.execute(sql, con) con.commit() con.close() con.close() # If notices print it if len(con.notices) > 0 and notices is True: for notice in con.notices: print notice.replace('\n', '')
def test_execute_closed_connection(self): create_sql = """ CREATE TABLE test ( a TEXT, b TEXT, c REAL, PRIMARY KEY (a, b) ); """ self.db.execute(create_sql) sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) self.db.close() try: sys.stdout = StringIO() self.assertRaises(Exception, sql.tquery, "select * from test", con=self.db) finally: sys.stdout = sys.__stdout__
def test_execute_fail(self): create_sql = """ CREATE TABLE test ( a TEXT, b TEXT, c REAL, PRIMARY KEY (a, b) ); """ self.db.execute(create_sql) sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) try: sys.stdout = StringIO() self.assertRaises(Exception, sql.execute, 'INSERT INTO test VALUES("foo", "bar", 7)', self.db) finally: sys.stdout = sys.__stdout__
def maintenance(self, sql): """ Permets d'effectuer des requêtes de maintenance sans se soucier des transactions. (Pas de transactions pour ce type de requête.) """ try: if self.debug == True: self.info("Maintenance - %s" %sql) self.old_isolation_level = self.con.isolation_level self.con.set_isolation_level(0) psql.execute(sql, con=self.con, cur=self.cur) self.con.set_isolation_level(self.old_isolation_level) if self.debug == True: self.info("... Requete %s exécutée." %(self.cur.statusmessage)) except psycopg2.DatabaseError, e: self.error("%s" %e)
def _execute_sql(self, q_in , env = None): "execute sql but intercept log" if self.do_log: self.log.append(q_in) env_final = env if isinstance(env, (list,tuple)) and len(env)>0 and isinstance(env[-1], dict): env_final = env[:-1] #remove last dict(), if parameters list if self.engine=="mysql" and isinstance(env, dict): #we must clean from what is not used env_final={k:v for k,v in env_final.items() if "%("+k+")s" in q_in} return execute(q_in ,self.conn, params = env_final)
def executeQuery(self, query): """ Execute query """ self.__reconnect_if_closed__() conn_from_pool = self.pool.getconn() res = psql.execute(query, conn_from_pool) result = res.fetchall() res.close() self.pool.putconn(conn_from_pool) return result
def execute(self, sql, commit=True, accents=False): """ Execute une requete pgsql qui ne renvoie pas de résultats. """ try: if accents is True: self.info("... Suppression des accents") sql = self.rm_accents(sql) sql = self.format_null_values(sql) if self.debug == True: self.info("Execution - %s" %sql) psql.execute(u"%s" % sql, con=self.con, cur=self.cur) self.con.commit() if self.debug == True: self.info("... Requete %s exécutée." %(self.cur.statusmessage)) except psycopg2.DatabaseError, e: self.error("%s" %e)