def get_data(): database = DB() q = "SELECT first_name, last_name, rguru_id FROM player_ids WHERE rguru_id IS NOT NULL" ids = database.query(q) results = [] for (fname, lname, id_) in ids: q = "SELECT dk_pts FROM rguru_hitters WHERE id=%s" scores = [ float(x[0]) for x in database.query(q, (str(id_), )) if x[0] is not None ] # dont count if not enough sample size. if len(scores) < 30: continue mean = np.mean(scores) # only look at players w/ avg score > 4 if mean < 4: continue stdev = np.std(scores) results.append((fname, lname, id_, round(mean, 2), round(stdev, 2), round((mean / stdev), 2))) return sorted(results, key=lambda x: x[5], reverse=True)
def trade_dates(ticker, min_date=None, max_date=None, as_dtime=True): """ Get a list of trade days for provided ticker """ db = DB() query = 'select distinct(date(`dtime`)) as `d` from `quotes` where `ticker` = "%s" ' % ticker if min_date is not None or max_date is not None: if min_date is not None and not isinstance(min_date, datetime.datetime): min_date = datetime.datetime.combine(min_date, datetime.time(0, 0)) if max_date is not None and not isinstance(max_date, datetime.datetime): max_date = datetime.datetime.combine(max_date, datetime.time(23, 59)) if min_date is not None and max_date is not None: query += 'and `dtime` between "%s" and "%s" ' % (min_date, max_date) elif min_date is not None: query += 'and `dtime` >= "%s" ' % min_date else: query += 'and `dtime` <= "%s"' % max_date query += 'order by `d` asc' db.cursor.execute(query) midnight = datetime.time(0, 0) for date in db.cursor: if as_dtime: yield datetime.datetime.combine(date[0], midnight) else: yield date[0] db.close() pass
def query_data(self): """Fetches all records for player id""" db = DB() q = "SELECT bat_order, AB, BB, HBP FROM rguru_hitters where id=%s" res = db.query(q, (self.id_,)) db.finish() return res
def get_data(): database = DB() q = "SELECT first_name, last_name, rguru_id FROM player_ids WHERE rguru_id IS NOT NULL" ids = database.query(q) results = [] for (fname, lname, id_) in ids: q = "SELECT dk_pts FROM rguru_hitters WHERE id=%s" scores = [float(x[0]) for x in database.query(q, (str(id_),)) if x[0] is not None] # dont count if not enough sample size. if len(scores) < 30: continue mean = np.mean(scores) # only look at players w/ avg score > 4 if mean < 4: continue stdev = np.std(scores) results.append(( fname, lname, id_, round(mean, 2), round(stdev, 2), round((mean/stdev), 2) )) return sorted(results, key=lambda x: x[5], reverse=True)
def execute(site, action): """ Execute the action for a site Steps: 1) download 2) convert 3) test 4) create_db 5) download_images 6) delete :param site: the site :param action: the action """ # check actions if not Sites.valid_action(action): print ERROR_ACTION_INVALID return if not Sites.is_valid_site(site): print ERROR_SITE return if site == "blueapron": from websites.blueapron import BlueApron __classmap__["blueapron"] = BlueApron if action == ACTION_DOWNLOAD: from lib.DownloadSite import DownloadSite download = DownloadSite(site, DEBUG) download.download() elif action == ACTION_CONVERT: from lib.Convert import Convert convert = Convert(site, DEBUG) convert.convert_html() elif action == ACTION_TEST: from lib.Testing import Testing testing = Testing(site, DEBUG) testing.test_json() elif action == ACTION_DOWNLOAD_IMAGE: from lib.DownloadSite import DownloadSite download = DownloadSite(site, DEBUG) download.download_images() elif action == ACTION_CREATE_DB: from lib.DB import DB database = DB(site, DEBUG) database.create() elif action == ACTION_DELETE: from lib.Cache import Cache cache = Cache(site, DEBUG) cache.delete_cache()
def fetch_data(site, pos): db = DB() q = "SELECT {0}_pts, {0}_salary FROM rguru_{1}".format(site, pos) points = [] salaries = [] for pts, sal in db.query(q): if pts is None or pts < 0 or sal is None or sal == 0: continue points.append(float(pts)) salaries.append(float(sal)/1000) return salaries, points
class GetHist: def __init__(self, player_id, site='dk'): self.db = DB() self.id_ = player_id self.points = self.get_points(player_id, site) def get_mean(self): return np.mean(self.points) def get_name(self): return get_player.name_from_id(self.id_) def get_stdev(self): return np.std(self.points) def plot_hist(self, bins=20): self.fig = plt.figure() self.ax = self.fig.add_subplot(111) self.ax.hist(self.points, bins=bins, normed=True) self.ax.axvline(self.get_mean(), color='red', linestyle='--', linewidth=1.5, label='Avg. Points: {0:.2f}\nSt. Dev. Points: {1:.2f}'.format( self.get_mean(), self.get_stdev())) self.ax.legend(loc='upper right') self.ax.set_title(self.get_name()) self.ax.set_ylabel('%') self.ax.set_xlabel('Points Scored') def get_points(self, id_, site): q = "SELECT {}_pts FROM rguru_hitters WHERE id={}".format(site, id_) return [float(x[0]) for x in self.db.query(q)]
def __init__(self, env): """ Parameters ---------- env : str the environment that the current instance is running """ print("[ENDPOINTS] Initializing...") # initialize libraries self._env = env self._db = DB(self._env, self._workers) self._logger = Logger(self._db, self._env) self._crypto = Crypto() # initialize Flask self._app = Flask(__name__) self._app.json_encoder = CustomJSONEncoder self._api = Api(self._app) self._app.before_request(self.detectAuthorization) self._app.after_request(self.finishRequest) for url in self._endpoints: self.addResource(self._endpoints[url], url) print("[ENDPOINTS] Done.")
sys.path.append("/var/www/html/") from bson import json_util from lib.DB import DB from phpserialize import serialize, unserialize from urlparse import urlparse DB = DB().connect() sql = """ SELECT cn_id, d_id, 1 as num, fbc_company_name, fbc_domain FROM domains JOIN freebase_companies ON d_domain = fbc_domain JOIN company_names ON fbc_company_name = cn_company_name; """ try: DB.execute(sql, ()) data = DB.fetchall() except: print 'Sorry, something went wrong!' insert_sql = "INSERT INTO company_domain_rel (cdr_cn_id, cdr_d_id, cdr_source, cdr_company_name, cdr_domain) VALUES(%s, %s, %s, %s, %s);" if data: for d in data: if d: try: DB.execute(insert_sql, (d['cn_id'],d['d_id'],d['num'],d['fbc_company_name'],d['fbc_domain'],)) print DB.mogrify(insert_sql, (d['cn_id'],d['d_id'],d['num'],d['fbc_company_name'],d['fbc_domain'],)) print DB.statusmessage except:
def get_team_abs(self, date, team): db = DB() q = "SELECT name, AB, BB, HBP FROM rguru_hitters WHERE date=%s and team=%s" res = db.query(q, (date, team)) db.finish() return res
def get_dates(self): db = DB() q = "SELECT date FROM rguru_hitters WHERE team = %s" res = db.query(q, (self.team,)) db.finish() return set([x[0] for x in res])
def processData(mssqlConfig, mysqlConfig, logFile, tables): logging.basicConfig(filename=logFile, filemode='a', level=logging.INFO, format='%(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') mysqlServer = mysqlConfig['host'] mysqlPort = mysqlConfig['port'] mysqlUsername = mysqlConfig['username'] mysqlPassword = mysqlConfig['password'] mysqlDB = mysqlConfig['db'] mysqlHandle = DB.myHandle(mysqlServer, mysqlUsername, mysqlPassword, mysqlDB, mysqlPort) print 'mysqlServer:' + mysqlServer + ' mysqlPort:' + str( mysqlPort) + ' mysqlUserName:'******'mysqlPassword:'******' mysqlDB:' + mysqlDB mssqlDriver = mssqlConfig['driver'] mssqlServer = mssqlConfig['host'] mssqlUsername = mssqlConfig['username'] mssqlPassword = mssqlConfig['password'] mssqlDB = mssqlConfig['db'] mssqlPort = mssqlConfig['port'] mssqlDSN = 'Driver={};Server={};Database={};Uid={};Pwd={};Port={};TDS_Version=8.0;'.format( mssqlDriver, mssqlServer, mssqlDB, mssqlUsername, mssqlPassword, mssqlPort) print 'mssqlDriver:' + mssqlDriver + ' mssqlServer:' + mssqlServer + ' mssqlUserName:'******'mssqlPassword:'******' mssqlDB:' + mssqlDB + ' mssqlPort:' + str( mssqlPort) mssqlHandle = DB.msHandle(mssqlDSN) mssqlUtil = MssqlUtil() mssqlUtil.setMssqlHandle(mssqlHandle.getCursor()) dataTypes = Utils.getDataTypes() process = DataProcess() process.setDBHandle(mysqlHandle, mssqlHandle) process.setLogging(logging) for table in tables: msTable = table myTable = '{}_{}'.format(mssqlServer, table) pk = mssqlUtil.getPrimaryKey(table) # Auto create table createSql = mssqlUtil.genCreateTableSql(table, myTable, dataTypes) mysqlHandle.getCursor().execute(createSql) # Update/Insert records in destination DB that do not match source DB. logging.info("Preparing to insert/update records.") process.refreshData(myTable, msTable, pk) logging.info("Insert/update records complete.") # Delete records from destination DB that no longer exist in source DB. logging.info("Preparing to delete records.") process.deleteData(myTable, msTable, pk) logging.info("Delete records complete.") mssqlHandle.disconnect() mysqlHandle.disconnect()
def run(): conn = DB().getConn() while conn: timeNow = time.time() if ((round(timeNow) % 15) == 0): save_data(conn, round(timeNow))
@staticmethod def insert_many(): sql = ''' INSERT INTO stock_plate(plate_code,first_stock_code, plate_name,stock_cnt,price_avg,wave_cnt,wave_range,vol,amount, first_stock_name, first_stock_price, first_stock_wave_cnt, first_stock_wave_range,remark,TIME) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' return sql @staticmethod def get_all_kline_info(now): sql = ''' select * from stock_kline where tr_date='%s' and date_type=0 and ex_type=1 ''' % now return sql @staticmethod def insert_stock_xg_many(): sql = ''' insert into stock_xg(code, tr_date, date_type, ex_type, xg, remark, time) values(%s, %s, %s, %s, %s, %s, %s) ''' return sql if __name__ == "__main__": db = DB() print db.excutemany(SQLMgr.insert_stock_xg_many(),[('11',1,1,1,'123','111','2016-11-11'),]) db.close()
def run(): conn = DB().getConn() while conn: save_data(conn)
""" Calculates the at bats of a player vs pitch hand. Tries to determine which players are used primarily as platoon players. Note: This script will probably not be functional for the time being because I use a different table in the DB to obtain the pitch hand. """ from lib.DB import DB import lib.get_player as get_player from collections import defaultdict import tabulate db = DB() def get_batter_ids(sample_lim=100): """ Gets batter ids for batters have at least the amount of given at bats :sample_lim: int, sample size constraint to include batters in the sample. :return: A list of batter ids """ q = "SELECT id FROM rguru_hitters" res = [x[0] for x in db.query(q)] selected_batters = [] for id_ in set(res): if res.count(id_) >= sample_lim: selected_batters.append(id_)
def run(): conn = DB().getConn() while conn: retry_data(conn) time.sleep(conf['retryInterval'])
class API: """ The Endpoints class Initializes the API portion of Bifrost. Also, takes care of authorizations. Attributes ---------- _endpoints : dict the endpoint with the associated classes _version : str current version that the API runs on _logger : Logger the logger object for keeping track of traffic _db : DB the DB object for DB interfaces """ _endpoints = conf._endpoints _protected = conf._protected _workers = conf._workers _version = "v0" _logger = None _db = None _crypto = None _env = "" # -------------------------------------------------------------------------- def __init__(self, env): """ Parameters ---------- env : str the environment that the current instance is running """ print("[ENDPOINTS] Initializing...") # initialize libraries self._env = env self._db = DB(self._env, self._workers) self._logger = Logger(self._db, self._env) self._crypto = Crypto() # initialize Flask self._app = Flask(__name__) self._app.json_encoder = CustomJSONEncoder self._api = Api(self._app) self._app.before_request(self.detectAuthorization) self._app.after_request(self.finishRequest) for url in self._endpoints: self.addResource(self._endpoints[url], url) print("[ENDPOINTS] Done.") # -------------------------------------------------------------------------- def getApp(self): """ Return Flask app AWS requires a Elastic Beanstalk app to be an executable app. As for now this works. Returns ------- Flask the flask application for AWS """ return self._app # -------------------------------------------------------------------------- def logRequests(self, rtype, request): """ Prepare log messages Prepare the messages that we want and use Logger to save them to disk, or send a notification. Parameters ---------- rtype : int response type request : Request request object that was generated by Flask """ status = "NORMAL" if rtype == 404 or rtype == 500: status = "CRITICAL" elif rtype == 401: status = "NOAUTH" self._logger.log( "endpoints", json.dumps({ "rtype": str(rtype), "path": request.path, "data": request.data.decode("utf8"), "args": request.args.to_dict(), "method": request.method, "remote_addr": request.remote_addr, "headers": request.headers.to_list() }), status=status ) # -------------------------------------------------------------------------- def sendAuthorizationError(self, message, token): """ Create the authorization error message Generates a error message that is used multiple times in the code. Parameters ---------- message : str message to be sent token : str the token that was used in the request Returns ------- str the returning JSON response as a string int HTTP response code """ return (json.dumps({ "error":message }), 401) # -------------------------------------------------------------------------- def isValid(self, session, patient_id): """ Check if token is valid Uses DB to check if the given token is existing and acive. The `active` flag in the DB can hence be used to quickly deactivate a token. Parameters ---------- session : str the token that was used in the request patient_id : str the patient ID that was sent with the request Returns ------- bool IF valid => True, ELSE False str the patient_id of the user associated with the session dict the full session dict containing all the information loaded from DB """ # check if token set; try: if session == "" or patient_id == "": return False, "", {} session = self._db.getSession(session) if session["patient_id"] != patient_id: return False, "", {} except: return False, "", {} return True, session["patient_id"], session # -------------------------------------------------------------------------- def detectAuthorization(self): """ Check if authorization is valid Uses the Flask request to check the header. The `Bearer` header must be present and name a valid session_id. Specifically, the function looks for the `Authorization: Bearer [SESSION]` header (note the exact format). Finally, the function adds `patient_id` and `session` to the request object, to make this information available to the system. """ request_path = request.path[len(self._version)+1:] header = request.headers.get("Authorization") if request_path in self._protected and request.method in self._protected[request_path]: header = request.headers.get("Authorization") if not header: return self.sendAuthorizationError("Invalid header. Request registered.", "") # bearer or token not set; outs = header.split() if len(outs) != 2: return self.sendAuthorizationError("Invalid authentication. Request registered.", "") bearer, session = outs auth, patient_id, obj = self.isValid(session, request.headers.get("Patient")) if bearer != "Bearer" or not auth: return self.sendAuthorizationError("Invalid authentication. Request registered.", session) request.patient_id = patient_id request.session = session request.obj = obj # -------------------------------------------------------------------------- def finishRequest(self, response): """ Hook for after response has been prepared This function logs the response. Parameters ---------- response : Response Response object for Flask Returns ------- response : Response Response object for Flask """ response.headers["Access-Control-Allow-Origin"] = "*" response.headers["Access-Control-Allow-Headers"] = "Authorization,Patient" self.logRequests(response.status_code, request) return response # -------------------------------------------------------------------------- def addResource(self, obj, url): """ Add resources to flask_restful Injects the API with the endpoints that are given in the `_endpoints` attribute. Parameters ---------- obj : flask_restful.Resource class to inject url : str Flask formatted endpoint """ print("[ADDED ROUTE]", "/"+self._version+url) self._api.add_resource( obj, "/"+self._version+url, resource_class_kwargs={ "logger":self._logger, "db":self._db, "crypto": self._crypto, "workers": self._workers, "request_string": "/"+self._version+url } )
try: driver.get(target_url) html = driver.page_source soup = soup = BeautifulSoup(html, 'lxml') tbody = soup.find_all('tbody') tbody = tbody[1] tr_list = tbody.find_all('tr') row_data = [] for tr in tr_list: tmp_row = [] td_list = tr.find_all("td") a = td_list[0].a.attrs tmp_row.append(a['href'].split('#')[1]) tmp_row.append(td_list[7].a.attrs['href'].split('/')[-2]) for td in td_list: tmp_row.append(td.get_text()) tmp_row[2] = tmp_row[2].encode('utf-8') tmp_row[9] = tmp_row[9].encode('utf-8') tmp_row[9] = tmp_row[9].split(" ")[0] tmp_row.append('test') tmp_row.append(datetime.datetime.now()) row_data.append(tmp_row) print row_data db = DB() print db.excutemany(SQLMgr.insert_many(), row_data) db.close() except Exception as e: print e finally: driver.quit()
def get(columns, ticker, dtime_from=None, dtime_to=None, date=None, market_hours=True, order='asc'): """ Get minutely quote values for the given time range or date :param columns: Iterable set of columns that should be obtained :param ticker: Ticker name :param dtime_from: When selecting by time range: Time range start :param dtime_to: When selecting by time range: Time range end :param date: When selecting by date: Date :param market_hours: Should the data be obtained only within market hours? :param order: Records datetime order: 'asc' - old first, 'desc' - new first :return: Iterable generator with tuples for dtime + specified columns """ if Data._db is None: Data._db = DB().db Data._cursor = Data._db.cursor() if isinstance(columns, str): # Specified indicators group: need to get all of them columns = Config.get('dbstructure.' + columns).keys() # MySQL tables that will be used and inner columns tables = [] query_columns = [] for column in columns: for table, table_indicators in Config.get('dbstructure').items(): if column in table_indicators: if len(query_columns) == 0: query_columns.append('`%s`.`dtime`' % table) query_columns.append('`%s`.`%s`' % (table, column)) if table not in tables: tables.append(table) break if len(tables) == 0: return query = 'select ' + ', '.join(query_columns) + ' from `' + '`, `'.join( tables) + '` ' query += 'where `%s`.`ticker` = "%s" ' % (tables[0], ticker) if date is not None: if isinstance(date, datetime.date): date = datetime.datetime.combine(date, datetime.time(0, 0)) elif isinstance(date, str): date = datetime.datetime.strptime(date, '%Y-%m-%d') if market_hours: dtime_from = tradetime.daystart(date) dtime_to = tradetime.dayend(date) else: dtime_from = date.replace(hour=0, minute=0) dtime_to = date.replace(hour=23, minute=59) query += 'and `%s`.`dtime` between "%s" and "%s" ' % ( tables[0], dtime_from, dtime_to) if market_hours and date is None: query += 'and time(`%s`.`dtime`) between "%s" and "%s" ' % ( tables[0], tradetime.start, tradetime.end) # Joining tables for table in tables: if table == tables[0]: continue query += 'and `%s`.`ticker` = `%s`.`ticker` ' % (table, tables[0]) query += 'and `%s`.`dtime` = `%s`.`dtime` ' % (table, tables[0]) query += 'order by `dtime` ' + order Data._cursor.execute(query) for entry in Data._cursor: yield entry Data._db.commit()
def __init__(self, player_id, site='dk'): self.db = DB() self.id_ = player_id self.points = self.get_points(player_id, site)
data = result_queue.get(20) #60s no data come , seem write done if count == 10: count = 0 logger.info("insert result: %s", db.excutemany(SQLMgr.insert_stock_xg_many(), write_list)) write_list = [] count += 1 write_list.append(data) except Exception as e: logger.info("db break %s", e) break #result_queue.task_done() if __name__ == "__main__": Log("test.log") try: db = DB() all_data = db.query(SQLMgr.get_all_kline_info(Tools.get_today_str())) if all_data: for row in all_data: task_queue.put(row) print all_data worker_thread_num = 4 worker_thread_list = [] for i in range(worker_thread_num): t = threading.Thread(target=worker, args=(task_queue, result_queue)) worker_thread_list.append(t) database_thread = threading.Thread(target=db_writer, args=(result_queue, db)) database_thread.setDaemon(True) database_thread.start() #