def downloadForecast(): #download start_date = datetime.date.today() end_date = start_date + datetime.timedelta(days=FORECAST_RANGE) f = '00' forecast_types = { FORECAST_FLX_FOLDER: 'http://nomads.ncep.noaa.gov/cgi-bin/filter_cfs_flx.pl?file=flxf{f_year}{f_month:02}{f_day:02}{time}.01.{year}{month:02}{day:02}{f}.grb2&lev_2_m_above_ground=on&lev_surface=on&var_PRATE=on&var_TMAX=on&var_TMIN=on&var_TMP=on&subregion=&leftlon=-76&rightlon=-52&toplat=-19&bottomlat=-56&dir=%%2Fcfs.{year}{month:02}{day:02}%%2F{f}%%2F6hrly_grib_01', FORECAST_PGB_FOLDER: 'http://nomads.ncep.noaa.gov/cgi-bin/filter_cfs_pgb.pl?file=pgbf{f_year}{f_month:02}{f_day:02}{time}.01.{year}{month:02}{day:02}{f}.grb2&lev_2_m_above_ground=on&lev_surface=on&var_APCP=on&var_RH=on&subregion=&leftlon=-76&rightlon=-52&toplat=-19&bottomlat=-56&dir=%%2Fcfs.{year}{month:02}{day:02}%%2F{f}%%2F6hrly_grib_01' } for key in forecast_types.keys(): for a_date in daterange(start_date, end_date): for a_time in ['00', '06', '12', '18']: url = forecast_types[key].format(year=start_date.year, month=start_date.month, day=start_date.day, f_year=a_date.year, f_month=a_date.month, f_day=a_date.day, time=a_time, f=f) folder = key filename = getFilenameForGDAS(a_date, a_time, f=f) open(folder + '/' + filename, 'wb').write(urllib.request.urlopen(url).read()) time.sleep(SLEEP)
def dailymessagesperuser(channelid: str, userid: str) -> typing.Tuple[str, str, dict]: """Returns discord channel name, user name and a dictionary including the number of messages every day from the start of the log to the end""" data = decompressdata(channelid) username = None daily = {} # Init daily message count dictionary with all days between first and last message start_dt = datetime.date.fromisoformat(data["messages"][0]["timestamp"].split("T")[0]) end_dt = datetime.date.fromisoformat(data["messages"][-1]["timestamp"].split("T")[0]) for dt in daterange(start_dt, end_dt): daily[dt.strftime("%Y-%m-%d")] = 0 for i in range(len(data["messages"])): user = data["messages"][i]["author"]["id"] if user != userid: continue date = data["messages"][i]["timestamp"].split("T")[0] username = f'{data["messages"][i]["author"]["name"]}#{data["messages"][i]["author"]["discriminator"]}' daily[date] += 1 # Remove empty messages from before the first message by user was sent for dt in daterange(start_dt, end_dt): if not daily[dt.strftime("%Y-%m-%d")]: del daily[dt.strftime("%Y-%m-%d")] else: break return (data['channel']['name'], username, daily)
def scrape(origin, destination, start_date, end_date): # Creates a file with the cheapest outgoing and return flights for a given # date range. items = int((end_date - start_date).days) - 1 date_list = daterange(start_date, end_date) data = {} for index, (depart_date, return_date) in enumerate(pairwise(date_list)): try: depart_list, return_list = search_flight(destination, depart_date, return_date, params) except: log.error(f"Some error occured, retrying item {index+1}/{items}") data = open_file(f'{destination}.json') data[str(depart_date)]['outgoing'] = min_depart data[str(return_date)]['return'] = min_return with open(f'{destination}.json', 'w') as f: json.dump(data, f, sort_keys=True, indent=8, separators=(',', ': ')) log.info(f"Search {index+1}/{items} complete: {depart_date} to " f"{return_date}. ({min_depart}MYR, {min_return}MYR)")
def test_daterange(): start_date = date(2018, 5, 30) end_date = date(2018, 6, 1) interval = daterange(start_date, end_date) assert date(2018, 5, 30) == interval.__next__() assert date(2018, 5, 31) == interval.__next__() assert date(2018, 6, 1) == interval.__next__()
def scrape_games(): parser = utils.incremental_date_range_cmd_line_parser() utils.ensure_exists('static/scrape_data') os.chdir('static/scrape_data') args = parser.parse_args() last_month = '' for cur_date in utils.daterange(datetime.date(2010, 10, 15), datetime.date.today()): str_date = time.strftime("%Y%m%d", cur_date.timetuple()) if not utils.includes_day(args, str_date): if DEBUG: print 'skipping', str_date, 'because not in cmd line arg daterange' continue mon = time.strftime("%b%y", cur_date.timetuple()) if mon != last_month: print print mon, cur_date.day*" ", sys.stdout.flush() last_month = mon ret = scrape_date(str_date, cur_date, passive=args.passive) if ret==DOWNLOADED: print 'o', elif ret==REPACKAGED: print 'O', elif ret==ERROR: print '!', elif ret==MISSING: print '_', else: print '.', sys.stdout.flush() print os.chdir('../..')
def downloadDataFromIMERG(start_date, end_date, folder): config_parser = ConfigParser() config_parser.read('resources/passwords.cfg') passman = urllib.request.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, 'https://urs.earthdata.nasa.gov', config_parser.get('IMERG', 'username'), config_parser.get('IMERG', 'password')) opener = urllib.request.build_opener( urllib.request.HTTPBasicAuthHandler(passman), urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar())) urllib.request.install_opener(opener) for a_date in daterange(start_date, end_date): filename = getFilenameForIMERG(a_date) if (path.isfile(folder + '/' + filename)): continue #TODO: Also check filesize url = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGDL.{version}/{year}/{month:02}/{filename}'.format( year=a_date.year, month=a_date.month, filename=filename, version=getIMERGVersion(a_date)) if (getIMERGVersion(a_date) == '05'): url = url.replace( 'data', 'opendap' ) + '.nc4?precipitationCal[1040:1280][339:709],precipitationCal_cnt[1040:1280][339:709],lon[1040:1280],lat[339:709]' request = urllib.request.Request(url) response = urllib.request.urlopen(request) handle = open(folder + '/' + filename, 'wb').write(response.read()) time.sleep(SLEEP)
def dailymessages(channelid: str) -> typing.Tuple[str, dict, dict]: """Returns discord channel name, a list of all user IDs who have sent a message and a dictionary including the number of messages every day from the start of the log to the end""" data = decompressdata(channelid) users = [] daily = {} # Init daily message count dictionary with all days between first and last message start_dt = datetime.date.fromisoformat( data["messages"][0]["timestamp"].split("T")[0]) end_dt = datetime.date.fromisoformat( data["messages"][-1]["timestamp"].split("T")[0]) for dt in daterange(start_dt, end_dt): daily[dt.strftime("%Y-%m-%d")] = 0 for i in range(len(data["messages"])): date = data["messages"][i]["timestamp"].split("T")[0] user = data["messages"][i]["author"]["id"] if not user in users: users.append(user) daily[date] += 1 return (data['channel']['name'], users, daily)
def download(years,datasource,dest,skip_existing,datatype,filefmt): """ Create date range and downloads file for each date. Args: years (iterable): years for which to download data datasource (str): source ('merra' or 'merra2') dest (str): path to destination directory skip_existing (bool): skip download if target exists datatype (str): choose 'wind' or 'solar' data for presets filefmt (str): file format to download """ try: options = PRESETS[datasource] except KeyError as e: raise ArgumentError("Unknown datasource '{}'".format(datasource)) if datatype not in options['datatypes']: raise ArgumentError("Unknown datatype '{}' for source '{}'".format(datatype,datasource)) if filefmt not in options['fileformats']: raise ArgumentError("Unknown file format '{}' for source '{}'".format(filefmt,datasource)) dates = chain(*[utils.daterange(start_date=datetime.date(year,1,1), end_date=datetime.date(year+1,1,1)) for year in years]) pool = utils.ThreadPool(4) def dl_task(date): download_date(date,dest,skip_existing,settings=options, datatype=datatype,filefmt=filefmt) for date in dates: pool.add_task(dl_task,date) pool.wait_completion()
def history(self, start, end): history = [] for date in daterange(start, end): day_balance = {} day_balance['day'] = date.strftime('%Y-%m-%d') day_balance['balance'] = self.balance_in_minutes( date.strftime(date.strftime('%Y-%m-%d'))) history.append(day_balance) return history
def get_blank_calendar_object(self, start_and_end): start = utils.get_date_from_gtfs_date(start_and_end['start_date']) end = utils.get_date_from_gtfs_date(start_and_end['end_date']) calendarObject = {} for d in utils.daterange(start, end): d = utils.get_gtfs_date_from_date(d) calendarObject[d] = None return calendarObject
def get_chart(data): if (data["type"] == "subjectinplaces"): if "all_locations" in data: all_locations = True subject = data["subject"].strip() if (subject == ""): return "{'error':'Empty subject'}" subject_regex = simpleregex.create(subject) now = datetime.now().date() locations = load_locations() return json.dumps( get_subject_relevance_in_places(now, subject_regex, locations, data["ctype"] == "r")) # End subjectinplaces elif (data["type"] == "subjectinplaceshistory"): subject = data["subject"].strip() if (subject == ""): return "{'error':'Empty subject'}" start_date = datetime.strptime(data["starts"], "%Y-%m-%d").date() end_date = datetime.strptime(data["ends"], "%Y-%m-%d").date() sr = simpleregex.create(subject) locations = load_locations() all_data = {} for current in utils.daterange(start_date, end_date + timedelta(days=1)): current_str = str(current.strftime("%d-%m-%Y")) all_data[current_str] = get_subject_relevance_in_places( current, sr, locations, data["ctype"] == "r") return json.dumps(all_data) # End subjectinplaceshistory elif (data["type"] == "subjectsinplace"): date = datetime.strptime(data["date"], "%Y-%m-%d").date() history = date != datetime.now().date() data = get_trends_in_place(date, data["location"], history=history) return json.dumps( data[0]["trends"]) if data != "" else '{"error": "Data not found"}' # End subjectsinplace return '[]'
def downloadDataFromGDAS(start_date, end_date, folder): for a_date in daterange(start_date, end_date): for a_time in ['00', '06', '12', '18']: for a_forecast in ['00', '03', '06', '09']: #a forcast time url = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_gdas_0p25.pl?file=gdas.t{time}z.pgrb2.0p25.f0{forecast}&lev_2_m_above_ground=on&var_GUST=on&var_RH=on&var_TCDC=on&var_TMAX=on&var_TMIN=on&var_TMP=on&subregion=&leftlon=-76&rightlon=-52&toplat=-19&bottomlat=-56&dir=%%2Fgdas.{year}{month:02}{day:02}%%2F{time}'.format( time=a_time, forecast=a_forecast, year=a_date.year, month=a_date.month, day=a_date.day) filename = getFilenameForGDAS(a_date, a_time, f=a_forecast) open(folder + '/' + filename, 'wb').write(urllib.request.urlopen(url).read()) time.sleep(SLEEP)
def graph(request): start = datetime.date(2013, 1, 27) to = datetime.timedelta(days=100) end = start + to if not cache.get('ar'): ar = {} feeds = Feed.objects.all() for feed in feeds: user_ar = {} r = daterange(start, to=end) for a in r: count = Post.objects.filter(dt_published__year=a.year, dt_published__month=a.month, dt_published__day=a.day, feed=feed).count() if count > 20: class_ = '_4' elif count > 15 and count <=20: class_ = '_3' elif count <=15 and count > 10: class_ = '_2' elif count <= 10 and count > 0: class_ = '_1' else: class_ = '_0' user_ar[a] = class_ od = collections.OrderedDict(sorted(user_ar.items())) ar[feed] = od ar = collections.OrderedDict(sorted(ar.items())) cache.set('ar', ar, 60*60) else: ar = cache.get('ar') ctx = { # 'post_list': Post.objects.all(), 'feed_count': [x.id for x in Feed.objects.all()], 'table': ar, 'today': datetime.datetime.today(), 'show_all': True } return ctx
def combine_price_and_sentiment(price_data, sentiment_data, date_start, date_end): p_and_s = {} for company in sentiment_data.keys(): company_ps = {} for date in daterange(date_start, date_end): date_ps = np.zeros(7) # Currently inefficient, if this becomes bottleneck then fix for date_list in price_data[company]: if date_list[0] == date: date_ps[:6] = date_list[1:] if date in sentiment_data[company]: date_ps[6] = sentiment_data[company][date] if not (date_ps == np.zeros(7)).all(): company_ps[date] = date_ps p_and_s[company] = company_ps return p_and_s
def _get_event_table(dimension, year, quarter=None): quarter_dates = ['12-31', '03-31', '06-30', '09-30'] start_year = year if quarter == 0: start_year -= 1 start_dt = datetime.datetime.strptime( str(start_year) + '-' + quarter_dates[quarter], "%Y-%m-%d") end_dt = datetime.datetime.strptime( str(year) + '-' + quarter_dates[(quarter + 1) % 4], "%Y-%m-%d") event_table = quandl.get_table('SHARADAR/EVENTS', date=list(daterange(start_dt, end_dt)), paginate=True) price_table = quandl.get_table('SHARADAR/SF1', dimension=dimension, calendardate=[start_dt, end_dt], paginate=True) return event_table, price_table
def download(years, datasource, dest, skip_existing, datatype, filefmt): """ Create date range and downloads file for each date. Args: years (iterable): years for which to download data datasource (str): source ('merra' or 'merra2') dest (str): path to destination directory skip_existing (bool): skip download if target exists datatype (str): choose 'wind' or 'solar' data for presets filefmt (str): file format to download """ try: options = PRESETS[datasource] except KeyError as e: raise ArgumentError("Unknown datasource '{}'".format(datasource)) if datatype not in options['datatypes']: raise ArgumentError("Unknown datatype '{}' for source '{}'".format( datatype, datasource)) if filefmt not in options['fileformats']: raise ArgumentError("Unknown file format '{}' for source '{}'".format( filefmt, datasource)) dates = chain(*[ utils.daterange(start_date=datetime.date(year, 1, 1), end_date=datetime.date(year + 1, 1, 1)) for year in years ]) pool = utils.ThreadPool(4) def dl_task(date): download_date(date, dest, skip_existing, settings=options, datatype=datatype, filefmt=filefmt) for date in dates: pool.add_task(dl_task, date) pool.wait_completion()
def load_stock(self, start_date, end_date): combination_setting = strategy.CombinationSetting() strategy_creator = CombinationStrategy(combination_setting) codes = [] for date in utils.daterange(utils.to_datetime(start_date), utils.to_datetime(end_date)): codes = list( set(codes + strategy_creator.subject(utils.to_format(date)))) data = None while data is None or len(data) <= self.state_size: self.code = numpy.random.choice(codes) data = Loader.load( self.code, utils.to_format( utils.to_datetime(start_date) - relativedelta(months=12)), end_date) data = utils.add_stats(data) data = Loader.filter(data, start_date, end_date) print("code: [%s]" % self.code) return data
def get(self): data = Analytics.parser.parse_args() likes = db.session.query( LikeModel.date_created, db.func.count(LikeModel.date_created)).filter( LikeModel.date_created > data['date_from'], LikeModel.date_created < data['date_to']).group_by( LikeModel.date_created).all() likes_dict = {date.date(): likes_count for date, likes_count in likes} result_stat = [{ "date": date.strftime('%Y-%m-%d'), "likes": likes_dict.get(date) if likes_dict.get(date) else 0 } for date in daterange( datetime.datetime.strptime(data['date_from'], "%Y-%m-%d").date(), datetime.datetime.strptime(data['date_to'], "%Y-%m-%d").date())] return result_stat, 200
def extractForecastData(lat, lon, out_filename): output = 'Date,Minimum Temp (C),Mean Temperature (C),Maximum Temp (C),Rain (mm),Relative Humidity %,CloudCover,Mean Wind SpeedKm/h' + '\n' today = datetime.date.today() for a_date in daterange( today, today + datetime.timedelta(hours=FORECAST_RANGE * 24)): FIELDS = ['Relative humidity'] fields_values = extractDailyDataFromGDAS( lat, lon + 360., a_date, FORECAST_PGB_FOLDER, FIELDS, typeOfLevel='heightAboveGround', f='00') mean_rh = (np.min(fields_values[FIELDS[0]]) + np.max(fields_values[FIELDS[0]])) / 2. FIELDS = [ 'Minimum temperature', 'Maximum temperature', 'Precipitation rate' ] fields_values = extractDailyDataFromGDAS( lat, lon + 360., a_date, FORECAST_FLX_FOLDER, FIELDS, typeOfLevel=['heightAboveGround', 'surface'], f='00') min_T, max_T = np.min(fields_values[FIELDS[0]]), np.max( fields_values[FIELDS[1]]) mean_T = (min_T + max_T) / 2. precipitation = np.sum( np.array(fields_values[FIELDS[2]]) * 60 * 60 * 6) output += a_date.strftime('%Y-%m-%d') + ', ' + ', '.join([ str(min_T), str(mean_T), str(max_T), str(precipitation), str(mean_rh) ]) + ',,' + '\n' open(out_filename.replace('.csv', '.forecast.csv'), 'w').write(output)
def scrape_games(): parser = utils.incremental_date_range_cmd_line_parser() utils.ensure_exists('static/scrape_data') os.chdir('static/scrape_data') args = parser.parse_args() last_month = '' yesterday = datetime.date.today() - datetime.timedelta(days=1) #Goko updates logs in real time; wait a day so the list is finalized. for cur_date in utils.daterange(default_startdate, yesterday, reverse=True): str_date = time.strftime("%Y%m%d", cur_date.timetuple()) if not utils.includes_day(args, str_date): if DEBUG: print 'skipping', str_date, 'because not in cmd line arg daterange' continue mon = time.strftime("%b%y", cur_date.timetuple()) if mon != last_month: print print mon, cur_date.day*" ", sys.stdout.flush() last_month = mon ret = scrape_date(str_date, cur_date, passive=args.passive) if ret==DOWNLOADED: print 'o', elif ret==REPACKAGED: print 'O', elif ret==ERROR: print '!', elif ret==MISSING: print '_', else: print '.', sys.stdout.flush() print os.chdir('../..')
def extractHistoricData(lat, lon, start_date, end_date, out_filename): output = '' if (not os.path.isfile(out_filename)): output = 'Date,Minimum Temp (C),Mean Temperature (C),Maximum Temp (C),Rain (mm),Relative Humidity %,CloudCover,Mean Wind SpeedKm/h' + '\n' first_date, last_date = getStartEndDates(DATA_FOLDER + 'cordoba.csv') start_date = min( start_date, first_date ) #in case this is a new city, we start from the very beginning for a_date in daterange(start_date, end_date): FIELDS = [ 'Minimum temperature', 'Maximum temperature', 'Relative humidity' ] #to validate that the +360 was ok: 1) gdal_translate a grib to a tif and open qgis with google map as background. 2) use https://www.latlong.net/Show-Latitude-Longitude.html 3)explore.py fields_values = extractDailyDataFromGDAS( lat, lon + 360., a_date, GDAS_FOLDER, FIELDS, typeOfLevel='heightAboveGround', f='03') min_T, max_T = np.min(fields_values[FIELDS[0]]), np.max( fields_values[FIELDS[1]]) mean_T = (min_T + max_T) / 2. mean_rh = (np.min(fields_values[FIELDS[2]]) + np.max(fields_values[FIELDS[2]])) / 2. precipitation = extractDailyDataFromIMERG(lat, lon, a_date) output += a_date.strftime('%Y-%m-%d') + ', ' + ', '.join([ str(min_T), str(mean_T), str(max_T), str(precipitation), str(mean_rh) ]) + ',,' + '\n' open(out_filename, 'a').write(output)
def chart_tweetsperday(self): self.c.execute('SELECT MIN(timestamp), MAX(timestamp) FROM tweets') firstt, lastt = self.c.fetchone() x = [] days = [] day = {} month = 0 months = [] x_months = [] self.c.execute('SELECT timestamp FROM tweets WHERE timestamp > 0') for row in self.c: t = time.localtime(float(row[0])) d = date.fromtimestamp(time.mktime(t)).isoformat() if d in day: day[d] += 1 else: day[d] = 1 for single_date in daterange(date.fromtimestamp(firstt), date.fromtimestamp(lastt)): days.append(single_date) d = single_date.isoformat() if d[0:7] != month: month = d[0:7] months.append(single_date) x_months.append([0,0]) if d in day: x.append(day[d]) x_months[len(x_months)-1][0] += day[d] else: x.append(0) x_months[len(x_months)-1][1] += 1 days = md.date2num(days) months = md.date2num(months) x_months = [float(_x[0])/float(_x[1]) for _x in x_months] plt.clf() ax = plt.subplot(111) plt.plot(days, x) plt.title(_("Tweets per day")) ax.set_ylabel(_("count")) ax.xaxis_date() plt.axis([min(days), max(days), 0, max(x)+5]) ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') ) labels = ax.get_xticklabels() plt.setp(labels, rotation=30, fontsize=10) plt.savefig(self.FILEPREFIX+"perday.png") self.charts.append(self.FILEPREFIX+"perday.png") plt.clf() ax = plt.subplot(111) plt.plot(months, x_months) plt.title(_("Tweets per day (averaged by month)")) ax.set_ylabel(_("count")) ax.xaxis_date() plt.axis([min(months), max(months), 0, max(x_months)+5]) ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') ) labels = ax.get_xticklabels() plt.setp(labels, rotation=30, fontsize=10) plt.savefig(self.FILEPREFIX+"perday.avg.png") self.charts.append(self.FILEPREFIX+"perday.avg.png") plt.clf() ax = plt.subplot(111) plt.hist(x, bins=30, color='b') plt.title(_("Tweets per day (distribution)")) ax.set_xlabel(_("count")) ax.set_ylabel(_("days")) plt.savefig(self.FILEPREFIX+"perday.hist.png") self.charts.append(self.FILEPREFIX+"perday.hist.png")
def runSimulation(account, dateRange, startingDeposit, selector, sampleSize=False, customTickerList=False, preloadToMemory=False, depositAmount=False, depositFrequency=False, comission=10, PrintToTerminal=True): ''' Runs a single simulation. Saves results to a csv file. -Daterange must be a 2 element list, in the following format: [[<start date>], [<end date>]], date format = string "YYYY-MM-DD". -depositFrequency is how often (in days) to deposit funds into your trading account. -selector is a StockSelectionInterface object. -Passing a customTickerList will run the simulation using only the tickers included in the list. ''' #Check for valid parameters if ((depositFrequency) and (not depositAmount)): raise ValueError("Deposit frequency set without deposit amount.") if ((depositAmount) and (not depositFrequency)): raise ValueError("Deposit amount set without deposit frequency.") #Instaniate objects if (PrintToTerminal): print("\nGetting tickers...") if (customTickerList): tickerList = customTickerList elif (sampleSize): tickerList = database.getTickerList(randomize=True, numberOfShuffles=2)[:sampleSize] else: tickerList = database.getTickerList() if (preloadToMemory): print("Preloading stock data to memory...") database.loadDatabaseToMemory(tickerList) #Set starting balance and comission account.depositFunds(startingDeposit) account.setCommision(comission) #Extract daterange startDate = dateRange[0] endDate = dateRange[1] #Progress bar header if (PrintToTerminal): print ("\nRuning Simulation...\n") print ("Selector: " + selector.getName()) #NOTE Don't forget to set your self.name property in you selector constructor print ("Daterange: "+startDate+" to "+endDate) print ("-------------------------------------------\n") sys.stdout.write("\r") sys.stdout.write("0.0%") sys.stdout.flush() daysSinceLastDeposit = 0 #Begin simulation for date in utils.daterange(startDate, endDate): #Check if market is open if (utils.isWeekday(date)): #Selects which stocks to sell ownedStocks = account.getOwnedStocks() ownedTickers = [] for ticker in ownedStocks: ownedTickers.append(ticker) stocksToSell = selector.selectStocksToSell(ownedTickers, date=date) #Sells stocks account.placeSellOrders(stocksToSell, date) #Selects which stocks to buy availibleFunds = account.getBalance() numberOfStocksToBuy = selector.numberOfStocksToBuy(availibleFunds) stocksToBuy = selector.selectStocksToBuy(numberOfStocksToBuy, date=date, customTickerList=tickerList) buyOrders = [] for stock in stocksToBuy: ticker = stock[0] price = database.getDataframe(ticker, [date,date], ["Open"]).loc[date, "Open"] quantity = int((stock[1]*(availibleFunds-(len(stocksToBuy)*comission))) / price) if quantity>0: buyOrders.append([ticker, quantity]) #Buys stocks account.placeBuyOrders(buyOrders, date) if (depositFrequency): daysSinceLastDeposit += 1 if (daysSinceLastDeposit == depositFrequency): account.depositFunds(depositAmount) daysSinceLastDeposit = 0 #Progress bar if (PrintToTerminal): completed = utils.getDayDifference(startDate, date) totalToDo = utils.getDayDifference(startDate, endDate) percentage = int(float(completed*1000)/(totalToDo-1))/10.0 sys.stdout.write("\r") sys.stdout.write(str(percentage)+"%") sys.stdout.flush() if (PrintToTerminal): print("\n") #Save logs account.saveHistory(selector.getName())
import datetime as dt from utils import daterange, fetch_game_listings def valid_date(x): try: return dt.datetime.strptime(x, '%Y-%m-%d') except ValueError: msg = 'Not a valid date: "{0}"'.format(x) raise argparse.ArgumentTypeError(msg) parser = argparse.ArgumentParser() parser.add_argument('--start-date', help='First game date to extract', type=valid_date, required=True) parser.add_argument('--end-date', help='Last game date to extract', type=valid_date, required=False) parser.add_argument('--refresh', help='Reload the game data, even if the score is final', required=False, action='store_true') args = parser.parse_args() print(args) if args.end_date is None: args.end_date = args.start_date for d in daterange(args.start_date, args.end_date): print('Getting listings for {}'.format(d.strftime('%Y-%m-%d'))) game_ids = fetch_game_listings(d) for gid in game_ids: load_game.delay(gid, skip_if_final=not args.refresh)
class MyURLOpener(urllib.FancyURLopener): def http_error_default(self, *args, **kwargs): urllib.URLopener.http_error_default(self, *args, **kwargs) if __name__ == '__main__': parser = utils.incremental_date_range_cmd_line_parser() args = parser.parse_args() utils.ensure_exists('static/scrape_data') os.chdir('static/scrape_data') for cur_date in utils.daterange(datetime.date(2010, 10, 15), datetime.date.today()): str_date = time.strftime("%Y%m%d", cur_date.timetuple()) if not utils.includes_day(args, str_date): print 'skipping', str_date, 'because not in cmd line arg daterange' continue directory = str_date print str_date games_short_name = str_date + '.all.tar.bz2' saved_games_bundle = directory + '/' + games_short_name if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE): print 'skipping because exists', str_date, saved_games_bundle, \ 'and not small (size=', os.stat(saved_games_bundle).st_size, ')' continue if not os.path.exists(directory): os.mkdir(directory) RemoveSmallFileIfExists(saved_games_bundle)
def main(parsed_args): """Primary update cycle""" # Scrape and load the data from goko, proceeding from the # previous day backwards, until no games are inserted today = datetime.date.today() log.info("Starting scrape for raw games") dates = utils.daterange(datetime.date(2010,10,14), today, reverse=True) for date in dates: log.info("Invoking scrape_raw_games async task for %s", date) async_result = watch_and_log(background.tasks.scrape_raw_games.s(date)) inserted = async_result.get() if inserted is None: log.info("Nothing processed for %s", date) elif inserted == 0: log.info("No games inserted for %s", date) break # Invoke the analyze script log.info("Starting analyze") analyze.main(parsed_args) # Check for goals log.info("Starting search for goals acheived") # Check for game_stats log.info("Starting game_stats summarization") dates = utils.daterange(datetime.date(2010,10,14), today, reverse=True) for date in dates: log.info("Invoking calc_goals_for_days async task for %s", date) async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date])) inserted = async_result.get() log.info("Invoking summarize_game_stats_for_days async task for %s", date) async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date])) if inserted == 0: log.info("No games parsed for goals on %s", date) break # Invoke the count_buys script log.info("Counting buys") count_buys.main(parsed_args) # Invoke the run_trueskill script log.info("Calculating trueskill") run_trueskill.main(parsed_args) # Invoke the optimal_card_ratios script log.info("Calculating optimal card ratios") optimal_card_ratios.main(parsed_args) # Invoke the goal_stats script log.info("Calculating goal stats") goal_stats.main(parsed_args) # Invoke the scrape_leaderboard script log.info("Scraping the leaderboard") scrape_leaderboard.main() # Invoke the load_leaderboard script log.info("Loading the leaderboard") load_leaderboard.main() log.info("Starting analyze2") # This is slow. Is it fast enough on cr? analyze2.main(parsed_args) log.info("Done with the update.py process")
return dt.datetime.strptime(x, '%Y-%m-%d') except ValueError: msg = 'Not a valid date: "{0}"'.format(x) raise argparse.ArgumentTypeError(msg) parser = argparse.ArgumentParser() parser.add_argument('--start-date', help='First game date to extract', type=valid_date, required=True) parser.add_argument('--end-date', help='Last game date to extract', type=valid_date, required=False) parser.add_argument('--refresh', help='Reload the game data, even if the score is final', required=False, action='store_true') args = parser.parse_args() print(args) if args.end_date is None: args.end_date = args.start_date for d in daterange(args.start_date, args.end_date): print('Getting listings for {}'.format(d.strftime('%Y-%m-%d'))) game_ids = fetch_game_listings(d) for gid in game_ids: load_game.delay(gid, skip_if_final=not args.refresh)
def CouncilroomGamesCollectionUrl(cur_date): host = 'http://councilroom.com/static/scrape_data/' return host + FormatDate(COUNCILROOM_FORMAT, cur_date) def RemoveSmallFileIfExists(fn): if (os.path.exists(fn) and os.stat(fn).st_size <= SMALL_FILE_SIZE): print 'removing small existing file', fn os.unlink(fn) args = parser.parse_args() for cur_date in utils.daterange(datetime.date(2010, 10, 15), datetime.date.today()): str_date = time.strftime("%Y%m%d", cur_date.timetuple()) if not utils.IncludesDay(args, str_date): print 'skipping', str_date, 'because not in cmd line arg daterange' continue directory = str_date print str_date games_short_name = str_date + '.all.tar.bz2' saved_games_bundle = directory + '/' + games_short_name if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE): print 'skipping because exists', str_date, saved_games_bundle, \ 'and not small (size=', os.stat(saved_games_bundle).st_size, ')' else: if not os.path.exists(directory): os.mkdir(directory) RemoveSmallFileIfExists(saved_games_bundle)
print("No layer data from {}. Will begin from {} instead".format(datetime.strptime(start_date,"%Y-%m-%d"), datetime.strptime(layer.date_min,"%Y-%m-%d"))) start_date = datetime.strptime(layer.date_min,"%Y-%m-%d") print("From {} to {}".format(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))) ############################################################################### # File checking ############################################################################### assert os.path.isdir(data_dir), "Couldn't find the dataset at {}".format(data_dir) # Generate labels file if non-existent if not os.path.exists(labels_file): print("Making labels file: {}".format(labels_file)) with open(labels_file, "w") as f: # Loop through dates for single_date in daterange(start_date, end_date): datestring = single_date.strftime("%Y-%m-%d") f.write(datestring + " \n") ############################################################################### # Add split to the labels file ############################################################################### # Count the number of examples in the dataset N = sum(1 for line in open(labels_file)) print("There are {} total examples in the dataset".format(N)) # Split the dataset into train-val-test first_split = int(TRAIN_CUTOFF * N) second_split = int(VAL_CUTOFF * N)
def main(parsed_args): """Primary update cycle""" # Scrape and load the data from isotropic, proceeding from the # current day backwards, until no games are inserted log.info("Starting scrape for raw games") for date in utils.daterange(datetime.date(2010, 10, 15), datetime.date.today(), reverse=True): log.info("Invoking scrape_raw_games async task for %s", date) async_result = watch_and_log(background.tasks.scrape_raw_games.s(date)) inserted = async_result.get() if inserted is None: log.info("Nothing processed for %s", date) elif inserted == 0: log.info("No games inserted for %s", date) break # Invoke the analyze script log.info("Starting analyze") analyze.main(parsed_args) # Check for goals log.info("Starting search for goals acheived") for date in utils.daterange(datetime.date(2010, 10, 15), datetime.date.today(), reverse=True): log.info("Invoking calc_goals_for_days async task for %s", date) async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date])) inserted = async_result.get() if inserted == 0: log.info("No games parsed for goals on %s", date) break # Check for game_stats log.info("Starting game_stats summarization") for date in utils.daterange(datetime.date(2010, 10, 15), datetime.date.today(), reverse=True): log.info("Invoking summarize_game_stats_for_days async task for %s", date) async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date])) inserted = async_result.get() if inserted == 0: log.info("No new games summarized on %s", date) break # Invoke the count_buys script log.info("Counting buys") count_buys.main(parsed_args) # Invoke the run_trueskill script log.info("Calculating trueskill") run_trueskill.main(parsed_args) # Invoke the optimal_card_ratios script log.info("Calculating optimal card ratios") optimal_card_ratios.main(parsed_args) # Invoke the goal_stats script log.info("Calculating goal stats") goal_stats.main(parsed_args) # Invoke the scrape_leaderboard script log.info("Scraping the leaderboard") scrape_leaderboard.main() # Invoke the load_leaderboard script log.info("Loading the leaderboard") load_leaderboard.main() log.info("Done with the update.py process")
def chart_mailsperday(self): self.c.execute('SELECT MIN(msg_time), MAX(msg_time) FROM mails') firstmail, lastmail = self.c.fetchone() x_out = [] x_in = [] day_out = {} day_in = {} days = [] month = 0 months = [] x_months_in = [] x_months_out = [] mere = re.compile(self.ME) self.c.execute('SELECT msg_time, msg_from, msg_to, msgid FROM mails WHERE msg_time > 0') for row in self.c: t = time.localtime(float(row[0])) x = date.fromtimestamp(time.mktime(t)) d = x.isoformat() if mere.match(row[1]) is None or mere.match(row[2]) is not None: # Mails to myself shouldn't be counted, they are either spam # or "irrelevant" for my communication profile if d in day_in: day_in[d] += 1 else: day_in[d] = 1 else: if d in day_out: day_out[d] += 1 else: day_out[d] = 1 for single_date in daterange(date.fromtimestamp(firstmail), date.fromtimestamp(lastmail)): days.append(single_date) d = single_date.isoformat() if d[0:7] != month: month = d[0:7] months.append(single_date) x_months_in.append([0,0]) x_months_out.append([0,0]) if d in day_in: x_in.append(day_in[d]) x_months_in[len(x_months_in)-1][0] += day_in[d] else: x_in.append(0) if d in day_out: x_out.append(day_out[d]) x_months_out[len(x_months_out)-1][0] += day_out[d] else: x_out.append(0) x_months_in[len(x_months_in)-1][1] += 1 x_months_out[len(x_months_out)-1][1] += 1 days = md.date2num(days) months = md.date2num(months) x_months_out = [float(_x[0])/float(_x[1]) for _x in x_months_out] x_months_in = [float(_x[0])/float(_x[1]) for _x in x_months_in] plt.clf() ax = plt.subplot(111) plt.plot(days, x_out, 'b', label=_('outgoing')) plt.plot(days, x_in, 'g', label=_('incoming')) plt.title(_("Mails per day")) ax.set_ylabel(_("count")) ax.xaxis_date() plt.axis([min(days), max(days), 0, max((max(x_in), max(x_out)))+5]) ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') ) labels = ax.get_xticklabels() plt.legend(loc=0) plt.setp(labels, rotation=30, fontsize=10) plt.savefig(self.FILEPREFIX+"perday.png") self.charts.append(self.FILEPREFIX+"perday.png") plt.clf() ax = plt.subplot(111) plt.plot(months, x_months_out, 'b', label=_('outgoing')) plt.plot(months, x_months_in, 'g', label=_('incoming')) plt.title(_("Mails per day (averaged by month)")) ax.set_ylabel(_("count")) ax.xaxis_date() plt.axis([min(months), max(months), 0, max((max(x_months_in), max(x_months_out)))+5]) ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') ) labels = ax.get_xticklabels() plt.legend(loc=0) plt.setp(labels, rotation=30, fontsize=10) plt.savefig(self.FILEPREFIX+"perday.avg.png") self.charts.append(self.FILEPREFIX+"perday.avg.png") plt.clf() ax = plt.subplot(111) plt.hist([x_out, x_in], bins=100, color=['b', 'g'], label=[_('outgoing'), _('incoming')], histtype='barstacked', log=False) plt.title(_("Mails per day (distribution)")) ax.set_xlabel(_("count")) ax.set_ylabel(_("days")) plt.xlim(0, 50) plt.legend(loc=0) plt.savefig(self.FILEPREFIX+"perday.hist.png") self.charts.append(self.FILEPREFIX+"perday.hist.png")
def apply_trace(figure, before, date, code, row, col, rows, cols, layout, options): args = create_args(code) end = date start = utils.to_format( utils.to_datetime(end) - utils.relativeterm(int(before))) simulator_data = strategy.load_simulator_data( args.code, start, end, args, names=["average"] if options["use_multi"] else []) df = simulator_data.middle df = df[df["date"] >= start] df = df.reset_index() # 陽線-> candle.data[1], 陰線 -> candle.data[0] candle = FF.create_candlestick(df["open"], df["high"], df["low"], df["close"], dates=df["date"]) stocks = list(candle.data) figure = add_stats(figure, row, col, stocks, df, []) # ローソク足をグラフに適用 # 日付リスト dates = list(map(lambda x: utils.to_datetime(x), df["date"].astype(str))) rangebreaks = [{ "values": list( filter(lambda x: not utils.is_weekday(x), utils.daterange(dates[0], dates[-1]))) }] # 休日を除外 daterange = [dates[0], dates[-1]] layout_settings = create_layout_settings(options) if options["use_multi"]: setting = layout_settings[0] setting["line"] = [setting["line"][0]] # candle with average layout_settings = [setting] # =================================================================== # layout_settingをもとに各グラフにデータを入れる domain_from = 0 for setting in sorted(layout_settings, key=lambda x: x["to"]): i = row if options["use_multi"] else setting["id"] label_i = ( (col - 1) + (row - 1) * cols) + 1 if options["use_multi"] else setting["id"] # multiの場合同じlegendがいくつも並ぶので最初のだけ有効に showlegend = layout["showlegend"] and row == 1 and col == 1 if options[ "use_multi"] else layout["showlegend"] figure = apply_fig_setting(figure, i, col, df, setting["bar"], mode="bar", showlegend=showlegend) figure = apply_fig_setting(figure, i, col, df, setting["line"], mode="lines", showlegend=showlegend) figure = apply_fig_setting(figure, i, col, df, setting["marker"], mode="markers", showlegend=showlegend) xlabel = "xaxis%s" % label_i layout[xlabel] = { "range": daterange, "rangebreaks": rangebreaks, "scaleanchor": "x1" } if not options["use_multi"]: ylabel = "yaxis%s" % label_i layout[ylabel] = {"domain": [domain_from, setting["to"]]} layout[xlabel]["domain"] = [0.0, 1.0] domain_from = setting["to"] return figure, layout