def read_ifs_csv(filename, commentlines=10): """Return tuple: data (by obs), dates, comments. Very basic reader for IFS CSV files. Only reads sequential valid observations. """ reader = csv.reader(open(filename)) #get comments commentdict = dict() for _ in range(commentlines): row = reader.next() key = row[0] commentdict[key] = row[1:] #get data and dates data = list() dates = list() startdata = False for row in reader: if "n.a." in row: if not startdata: continue else: break startdata = True date, vals = row[0], map(float,row[1:]) dates.append( datestr2date(date) ) data.append(vals) #make comments by series nseries = len(data[0]) comments = [dict() for _ in range(nseries)] for key in commentdict: if len(commentdict[key]) == nseries: for i in range(nseries): comments[i][key] = commentdict[key][i] else: logging.info("Missing comments?"+commendict[key]) return data, dates, comments
def draw(self, temperature_by_hour): dates = [] temperatures = [] for date, temperatue in temperature_by_hour.items(): dates.append(datetime.datetime.fromisoformat(date)) temperatures.append(temperatue) self.plot.draw(dates, temperatures)
def calculateTableOfConsumption(meter_id, dateStart=dt.datetime(2011,5,12), dateEnd = dt.datetime(2011,5,16), strict=True): circuit_id = getCircuitsForMeter(meter_id) # define numpy array for data numRows = (dateEnd - dateStart).days + 1 numColumns = len(circuit_id) data = np.zeros((numRows, numColumns)) dates = [] date = dateStart i = 0 while date <= dateEnd: dates.append(date) j = 0 for cid in circuit_id: data[i,j] = getDailyEnergyForCircuit(cid, date, verbose=0, method='max', requireMonotonic=True, reportThreshold=12, monotonicThreshold=-1, strict=strict) j += 1 date += dt.timedelta(days=1) i += 1 return dates, circuit_id, data
def parse_source(self, fh): """Parse source into header, dates, and data. """ header_lines = [] dates = [] data = [] header = True for line in fh: if header: if line.startswith('DATE'): header = False self.header = ''.join(header_lines) continue #do not process this line else: header_lines.append(line) else: #no longer header -> process the date&datum lines line = line.strip().split() try: thedate=[int(xi) for xi in line[0].split('-')] #e.g. 2005-09-11 -> [2005,09,11] dates.append(datetime.date(*thedate)) except: if not line: logging.info("Empty line skipped.") else: logging.warn("Date field not a date; line skipped. Contents:\n%s"%(line)) continue try: data.append(float(line[1])) except: logging.warn("Missing value set to nan.") data.append(nan) self.dates = dates self.data = data
def read_ifs_csv(filename, commentlines=10): """Return tuple: data (by obs), dates, comments. Very basic reader for IFS CSV files. Only reads sequential valid observations. """ reader = csv.reader(open(filename)) #get comments commentdict = dict() for _ in range(commentlines): row = reader.next() key = row[0] commentdict[key] = row[1:] #get data and dates data = list() dates = list() startdata = False for row in reader: if "n.a." in row: if not startdata: continue else: break startdata = True date, vals = row[0], map(float, row[1:]) dates.append(datestr2date(date)) data.append(vals) #make comments by series nseries = len(data[0]) comments = [dict() for _ in range(nseries)] for key in commentdict: if len(commentdict[key]) == nseries: for i in range(nseries): comments[i][key] = commentdict[key][i] else: logging.info("Missing comments?" + commendict[key]) return data, dates, comments
def get_tdata(tdeviceId, starttime, endtime): global dbfile temperatures = [] mintemp = 100 maxtemp = -100 mintempdate = sys.float_info.max maxtempdate = 0; with sqlite3.connect(dbfile) as conn: curs = conn.cursor() cmd = u"SELECT timestamp, temperature FROM temperatures where device = '{0}' and timestamp>=datetime('{1}') AND timestamp<=datetime('{2}') order by timestamp limit 30000".format( tdeviceId, starttime, endtime) curs.execute(cmd) rows = curs.fetchall() dates = [] for row in rows: dt = date2num(datetime.datetime.strptime(row[0], u"%Y-%m-%d %H:%M:%S.%f")) dates.append(dt) temp = float(row[1]) temperatures.append(temp) if mintemp > temp: mintemp = temp mintempdate = dt if maxtemp < temp: maxtemp = temp maxtempdate = dt avgtemp = "NA" return dates, temperatures, mintempdate, mintemp, maxtempdate, maxtemp, avgtemp
def loadDataFromSqllite(filename, date_from, date_to): db = sqlite3.connect(filename) cursor = db.cursor() data = cursor.execute("select amount,price,date,tid from trades where ( (date>"+str(date_from)+") and (date<"+str(date_to)+") and (currency='USD') )") actual_date_from = 9999999999999 actual_date_to = 0 volumes = [] prices = [] dates = [] for row in data: volume = float(row[0]) price = float(row[1]) date = int(row[2]) if ( actual_date_from > date ): actual_date_from = date if ( actual_date_to < date ): actual_date_to = date if ( price > 1000 ): print int(row[3]) print price continue volumes.append( volume ) prices.append( price ) dates.append( date ) cursor.close() result = {} result["prices"]=prices result["volumes"]=volumes result["dates"]=dates result["date_from"]=actual_date_from result["date_to"]=actual_date_to return result
def graph_ET_results(date_time, evapotranspiration, title="Evapotranspiration", ylabel='mm', xlabel='Date', verbose=True): """ Graphs PET results and returns matplotlib dates object from x axis key arguments: date_time = pandas series of Timestamp objects evapotranspiration = PET estimates corresponding with Timestamps title = title for graph (default is "Evapotranspiration") ylabel = y axis label for graph (default is 'mm') xlabel = x axis label for graph (default is 'Date') verbose(bool) if True, displays graph (default is true) """ dates = [] for i in range(len(date_time)): dates.append(datetime.datetime.date(date_time.iloc[i])) dates = sorted(list(set(dates))) dates_plot = matplotlib.dates.date2num(dates) if (verbose): plt.figure(figsize=(15, 10)) plt.plot_date(dates_plot, np.asarray(evapotranspiration), '-', ydate=False) plt.title(title) plt.xticks(rotation='vertical') plt.ylabel(ylabel) plt.xlabel(xlabel) plt.show() return dates
def closing_info(ticker='GOOG',days=5): API_KEY = generate_api_key() r = requests.get('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol='+ticker+'&outputsize=full&apikey=' + API_KEY) results = r.json() closing_list = [] dates = [] days_get = days count = 0 for day in results['Time Series (Daily)']: if count == days_get: break dt = datetime.datetime.strptime(day, '%Y-%m-%d').strftime('%d/%m') dates.append(dt) # dates.append(day) closing_list.append(float(results['Time Series (Daily)'][day]['4. close'])) count += 1 dates.reverse() closing_list.reverse() return dates,closing_list
def generate_histogram(user_activity_ddict,xlabel,ylabel): import numpy as np figure=plt.figure(figsize=(6*3.13,4*3.13)) width = .3 ax = plt.axes() nrow=1 for feature in user_activity_ddict: ax=plt.subplot(3,3,nrow) nrow+=1 x=user_activity_ddict[feature]['x'] dates=[] for dt in x: dates.append(datetime.strptime(dt, "%Y-%m-%d").date()) y=user_activity_ddict[feature]['y'] width=0.8 ax.bar(range(len(dates)), y, width=width,label=feature.decode('utf8')) ax.set_xticks(np.arange(len(dates)) + width/2) ax.set_xticklabels(dates, rotation=60) ax.legend(loc='upper right',prop={'size':10}) plt.show() figure.savefig('AverageTweetsPerHashtags.png', dpi=(600)) plt.close()
def generate_plot(name,hours): dates = [] btc = [] eth = [] n = 0 history_file = open('./history.txt','r') lines = history_file.readlines() history_file.close() date = datetime.now() while (datetime.now() - date).total_seconds() < hours * 3600: n = n + 1 line = lines[-n].split(",") date = datetime.strptime(line[0], "%Y-%m-%d %H:%M:%S.%f") btc_price = float(line[1]) eth_price = float(line[2]) dates.append(date) btc.append(btc_price) eth.append(eth_price) if name == 'BTC': fig, ax = plt.subplots() ax.plot(dates,btc) ax.set_title("Evolution of the price in the last "+str(hours)+" h") plt.xlabel("Date") plt.ylabel("BTC price") fig.autofmt_xdate() ax.fmt_xdata = mdates.DateFormatter('%H:%M') plt.grid() plt.savefig("./btc.png") plt.clf() fig, ax = plt.subplots() opens, closes, highs, lows , d = get_candlesticks(btc,dates,2*hours) candlestick(ax, opens, closes, highs, lows, width = 0.6) print("Candlesticks for BTC plotted") ax.set_title("Evolution of the price in the last "+str(hours)+" h") plt.xlabel("Step = "str(2*hours)+" points") plt.ylabel("BTC price") plt.grid() plt.savefig("./btc_candlestick.png") plt.clf() if name == 'ETH': fig, ax = plt.subplots() ax.plot(dates,eth) ax.set_title("Evolution of the price in the last "+str(hours)+" h") plt.xlabel("Step") plt.ylabel("ETH price") fig.autofmt_xdate() ax.fmt_xdata = mdates.DateFormatter('%H:%M') plt.grid() plt.savefig("./eth.png") plt.clf() fig, ax = plt.subplots() opens, closes, highs, lows, d = get_candlesticks(eth,dates,2*hours) candlestick(ax, opens, closes, highs , lows, width = 0.6) ax.set_title("Evolution of the price in the last "+str(hours)+" h") plt.xlabel("Step = "str(2*hours)+" points") plt.ylabel("ETH price") plt.grid() plt.savefig("./eth_candlestick.png") plt.clf()
def add_series(self, y, y_label, color, smoothing_window=0, max_y=0): if len(self.x) != len(y): print "Warning", len(self.x), "x values in plot but", len(y), \ "y values (" + y_label + "), truncating" if len(self.x) < len(y): y = y[:len(self.x)] elif len(y) < len(self.x): self.x = self.x[:len(y)] if smoothing_window > 0: smoothed_y = [] for i in range(smoothing_window, len(y) - smoothing_window): smoothed_y.append( numpy.average(y[i - smoothing_window:i + smoothing_window])) y = smoothed_y # self.x = self.x[:len(y)] if self.offset > 0: ax2 = self.ax.twinx() self.fig.subplots_adjust(right=0.8) ax2.spines["right"].set_position( ("axes", 1 + (self.offset - 1.0) * 0.15)) PlotXY._make_patch_spines_invisible(ax2) PlotXY._make_spine_invisible(ax2, "right") else: ax2 = self.ax # plot this with string values on the x-axis if isinstance(self.x[0], str): if len(self.x[0]) == 7: fmt = "%Y-%m" # we have to be small enough so that feb doesn't overlap, i.e. 28 days bar_width = 27.0 / self.num_series else: fmt = "%Y-%m-%d" # 1.0 is one day bar_width = 0.95 / self.num_series dates = [] for x_i in self.x: # calendar.monthrange(int(x_i[:4]), int(x_i[5:7]))[1] dates.append( matplotlib.dates.date2num( datetime.datetime.strptime(x_i, fmt))) for i in range(0, len(dates)): dates[i] += (bar_width * self.offset) ax2.bar(dates, y, color=color, width=bar_width, linewidth=0) # mask out the 0 values # y = numpy.ma.array(y) # y = numpy.ma.masked_where(y == 0, y) # ax2.plot_date(dates, y, color=color, ls="-") ax2.xaxis_date() if max_y == 0: max_y = max(y) * 1.05 ax2.set_ylim(0, max_y) else: # x may need to be truncated because of smoothing ax2.plot(self.x[:len(y)], y, color=color) ax2.axis([0, max(self.x), min(y) * 0.95, max(y) * 1.05]) ax2.set_ylabel(y_label, color=color) ax2.get_axes().grid() for tl in ax2.get_yticklabels(): tl.set_color(color) self.offset += 1
def splitfunc(birddata): moves = [] dates = [] for i in birddata: splits = i.split(' ') dates.append(splits[0]) splits2 = i.split() moves.append(int(splits2[2])) return dates, moves
def _process_trades(self, trade_data): "Trade data should be (dates, prices, amounts)" epochs = np.array(trade_data[0], dtype=np.int32) prices = np.array(trade_data[1], dtype=np.float) amounts = np.array(trade_data[2], dtype=np.float) #epochs.dump('/tmp/epochs') #prices.dump('/tmp/prices') #amounts.dump('/tmp/amounts') dates = list() opens = list() closes = list() highs = list() lows = list() volumes = list() period_start = epochs[0] period_end = period_start + self.granularity stop = epochs[-1] while period_start < stop: remaining_index = epochs >= period_start epochs = epochs[remaining_index] prices = prices[remaining_index] amounts = amounts[remaining_index] trim_index = epochs < period_end if trim_index.any(): price_selection = prices[trim_index] amount_selection = amounts[trim_index] dates.append(matplotlib.dates.epoch2num(period_start)) opens.append(price_selection[0]) closes.append(price_selection[-1]) highs.append(price_selection.max()) lows.append(price_selection.min()) volumes.append(amount_selection.sum()) else: dates.append(matplotlib.dates.epoch2num(period_start)) last_price = closes[-1] opens.append(last_price) closes.append(last_price) highs.append(last_price) lows.append(last_price) volumes.append(0) period_start = period_end period_end += self.granularity # Matplotlib likes this array the other way around, # so it gets transposed() self.quotes = np.array((dates, opens, closes, highs, lows, volumes), dtype=np.float).transpose() self.save() self.refreshed_signal.emit(self.quotes)
def table_content(edm, item_info, redirect=True, max_entries=float("inf")): """ Return contents to fill a table using html_table function Args: item_info (dict) -- a dict of all infos to display results of redirect (bool) -- if True a ('Link', list(links)) is returned among with dates and sizes max_entries (int or float) -- max number of entries to append to data list Returns: [('Date', dates), ('Size', sizes)] (and links if redirect=True) dates and sizes (list) -- all dates and sizes of given item_info associated with a color. If it's a bad item first non zero element of dates, sizes and links will be red e.g. dates = [('2018-03-03T1212', 'red'), ('2018-03-04T1345', '')] sizes = [(2.3, 'red'), (0.45, '')] Note that if colors are removed from the lists, then dict(zip(dates, sizes)) will be equivalent to get_runs(db, item_info) TODO: change links when moving to lxplus """ dates, sizes, links = [], [], [] ordered = edm.last_to_first(item_info) entries = 0 out_of_range_colour = 'red' \ if edm.is_red_item(item_info) else '' for date in ordered: if entries < max_entries: dates.append((date, out_of_range_colour)) sizes.append((ordered[date], out_of_range_colour)) ref_abs = edm.test_to_archive(item_info, date) ref_relative = ref_abs[ref_abs.find('ART/'):] for ref in glob.glob(os.path.join(ref_abs, '*')): alias = os.path.basename(ref) ref = os.path.join(ref_relative, alias) links.append((make_link('../../archive/' + ref, alias), out_of_range_colour)) out_of_range_colour = '' entries += 1 dates = ('Date', dates) sizes = ('Size', sizes) links = ('Link', links) if redirect: return [dates, sizes, links] elif not redirect: return [dates, sizes]
def getHistData(numSamples): curs.execute("SELECT * FROM DHT_data ORDER BY timestamp DESC LIMIT " + str(numSamples)) data = curs.fetchall() dates = [] temps = [] hums = [] for row in reversed(data): dates.append(row[0]) temps.append(row[1] * 1.8 + 32) hums.append(row[2]) return dates, temps, hums
def __init__(self, user_id): Weight_reading_query = db.execute( "SELECT weight, rdate FROM weight WHERE user_id=%s ORDER BY rdate ASC LIMIT 5", (user_id, )) result = db.fetchall() dates = [] values = [] for row in result: dates.append(row[1]) values.append(row[0]) plt.plot(dates, values, 'o-') plt.grid() plt.show()
def __init__(self, user_id): GKI_reading_query = db.execute( "SELECT ((glucose)/(ketones)) AS gki, rdate FROM readings WHERE user_id=%s ORDER BY rdate ASC LIMIT 5", (user_id, )) result = db.fetchall() dates = [] values = [] for row in result: dates.append(row[1]) values.append(row[0]) plt.plot(dates, values, 'o-') plt.grid() plt.show()
def add_series(self, y, y_label, color, smoothing_window=0, max_y=0): if len(self.x) != len(y): print "Warning", len(self.x), "x values in plot but", len(y), \ "y values (" + y_label + "), truncating" if len(self.x) < len(y): y = y[:len(self.x)] elif len(y) < len(self.x): self.x = self.x[:len(y)] if smoothing_window > 0: smoothed_y = [] for i in range(smoothing_window, len(y) - smoothing_window): smoothed_y.append(numpy.average(y[i - smoothing_window:i + smoothing_window])) y = smoothed_y # self.x = self.x[:len(y)] if self.offset > 0: ax2 = self.ax.twinx() self.fig.subplots_adjust(right = 0.8) ax2.spines["right"].set_position(("axes", 1 + (self.offset - 1.0) * 0.15)) PlotXY._make_patch_spines_invisible(ax2) PlotXY._make_spine_invisible(ax2, "right") else: ax2 = self.ax # plot this with string values on the x-axis if isinstance(self.x[0], str): if len(self.x[0]) == 7: fmt = "%Y-%m" # we have to be small enough so that feb doesn't overlap, i.e. 28 days bar_width = 27.0 / self.num_series else: fmt = "%Y-%m-%d" # 1.0 is one day bar_width = 0.95 / self.num_series dates = [] for x_i in self.x: # calendar.monthrange(int(x_i[:4]), int(x_i[5:7]))[1] dates.append(matplotlib.dates.date2num(datetime.datetime.strptime(x_i, fmt))) for i in range(0, len(dates)): dates[i] += (bar_width * self.offset) ax2.bar(dates, y, color=color, width=bar_width, linewidth=0) # mask out the 0 values # y = numpy.ma.array(y) # y = numpy.ma.masked_where(y == 0, y) # ax2.plot_date(dates, y, color=color, ls="-") ax2.xaxis_date() if max_y == 0: max_y = max(y) * 1.05 ax2.set_ylim(0, max_y) else: # x may need to be truncated because of smoothing ax2.plot(self.x[:len(y)], y, color=color) ax2.axis([0, max(self.x), min(y) * 0.95, max(y) * 1.05]) ax2.set_ylabel(y_label, color = color) ax2.get_axes().grid() for tl in ax2.get_yticklabels(): tl.set_color(color) self.offset += 1
def makeGraph(date): #ファイルからデータの読み出し f = open(logpath + date + ".log", 'r') dates = [] temp = [] humi = [] pres = [] moist = [] #ファイルからデータ取り出し for line in f: dates.append(datetime.strptime(line[0:13], "%Y%m%d%H%M%S")) temp.append(float(line[25:29])) humi.append(float(line[32:36])) pres.append(float(line[16:22])) moist.append(float(line[40:43])) #時刻データの数値への変換 x = mplt.dates.date2num(dates) #数値へ変換 #グラフのインスタンス取得 fig, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6)) plt.ylim([0, 80]) #ax1のY軸スケール指定 ax2 = ax1.twinx() plt.ylim([960, 1030]) #ax2のY軸スケール指定 #X軸の時間軸フォーマット指定 timeFmt = mplt.dates.DateFormatter('%H:%M') #日/時:分で表示 start = mplt.dates.DateFormatter(datetime.strptime('00:00', '%H:%M')) stop = mplt.dates.DateFormatter(datetime.strptime('23:59', '%H:%M')) #ax1.set_xlim(['00:00','23:59']) ax1.xaxis.set_major_formatter(timeFmt) #グラフ描画 X軸を時間にする ax1.plot_date(x, temp, 'g', xdate=True, label="Temparature") #color=red Time ax1.plot_date(x, humi, 'k', xdate=True, label="Humidity") #color=green ax1.plot_date(x, moist, 'r', xdate=True, label="Plant Moisture") #color=black ax2.plot_date(x, pres, 'b', xdate=True, label="Pressure") #color=blue #横軸の追加 ax1.axhline(y=25, color='k', linestyle='--') ax1.axhline(y=50, color='k', linestyle='--') ax2.axhline(y=1013, color='m', linestyle='--') ax1.legend(loc='lower left') #凡例表示位置指定 ax2.legend(loc='lower right') ax1.set_xlabel('time') ax1.set_ylabel('temp(DegC)/humidity(%RH)/\nplanter moisture(%RH)', color='k') ax2.set_ylabel('air pressure(hPa)', color='b') #日本語タイトル ax1.set_title(time.strftime('%m/%d') + ' HIMAWARI.chan ', fontsize=25) fig.savefig(graphfile)
def fetch_data(db_c, game_nwords, username): db_c.execute( "SELECT date, num_correct FROM games" " WHERE user=? AND num_total=?" " ORDER BY date ASC", [username, game_nwords]) rows = db_c.fetchall() dates, scores = [], [] for date, num_correct in rows: date_o = dateutil.parser.isoparse(date) date_p = matplotlib.dates.date2num(date_o) dates.append(date_p) scores.append(num_correct) data_x = np.array(dates, dtype=np.float64) data_y = np.array(scores, dtype=np.float64) return data_x, data_y
def plotData(self): """Plots the data.""" dates = [] calls = [] puts = [] for data in self._alldata: tempdate = matplotlib.dates.datestr2num(data.getDate()) dates.append(tempdate) calls.append(data.getCall()) puts.append(data.getPut()) matplotlib.pyplot.plot_date(dates, calls, 'b', label='Call Options') matplotlib.pyplot.plot_date(dates, puts, 'r', label='Put Options') matplotlib.pyplot.legend(loc=2) matplotlib.pyplot.xlabel('Dates') matplotlib.pyplot.ylabel('Call Options & Put Options') matplotlib.pyplot.show()
def readData(filename): ''' (file) -> list of str, list of float Gets file name and return two lists: with data in string format and square metre price in float. ''' f = open(filename, 'r') dates = [] prices = [] lines = f.readlines() for line in lines: lineData = line.strip().split() dates.append(lineData[0]) prices.append(float(lineData[1])) f.close() return dates, prices
def getBPG(npoints=0, autoupdate=False): #cmd = "SELECT value_bpg, timestamp FROM pressure" cmd = "SELECT value_bpg, timestamp FROM pressure ORDER BY timestamp DESC" if npoints > 0: cmd += " limit " + str(npoints) db = dsu.openSQL(dbName) cur = db.cursor() cur.execute(cmd) resp = cur.fetchall() cur.close() db.close() vals = [] dates = [] # interactive mode on if (autoupdate): pylab.ion() for rr in resp: vals.append(float(rr[0])) dates.append(rr[1]) figure = pylab.figure(1) subplot = pylab.subplot(111) ax = figure.gca() timeRange = dates[-1] - dates[0] year_range = timeRange.days / 365. #ax.plot_date(pylab.date2num(dates), vals, fmt='k.') #figure.autofmt_xdate() lines = ax.semilogy(dates, vals, 'k.') dpu.format_line_ticks(ax, year_range) if (autoupdate): figure.canvas.draw() else: pylab.show() if (autoupdate): print "autoupdate" time.sleep(5.0)
def plot_katrina_correlation(ca=None, em=7): fig = plt.figure() ibdata = IbtracsData() w, k = ibdata.load_wilma_katrina() if not ca: ca = ClassificationAnalysis() cs, ms, ums = ca.run_individual_cla_analysis(2005, em) pressures = [] winds = [] dates = [] for cm in ms: if cm.best_track.name == k.name: for date, bt_pres, bt_wind in zip(cm.best_track.dates, cm.best_track.pressures, cm.best_track.winds): if date in cm.cyclone.pmins and cm.cyclone.pmins[date]: dates.append(date) pressures.append((bt_pres, cm.cyclone.pmins[date] / 100.)) winds.append((bt_wind, cm.cyclone.max_windspeeds[date])) pressures, winds = np.array(pressures), np.array(winds) labelx = -0.1 ax = plt.subplot(211) plt.plot_date(dates, pressures[:, 0], 'b-', label='best track') plt.plot_date(dates, pressures[:, 1], 'b--', label='derived track') plt.ylabel('pressure (hPa)') plt.legend(bbox_to_anchor=(1.07, 1.16), numpoints=1, prop={'size': 10}) # ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d')) plt.setp(ax.get_xticklabels(), visible=False) ax.yaxis.set_label_coords(labelx, 0.5) ax = plt.subplot(212) plt.plot_date(dates, winds[:, 0], 'r-', label='best track') plt.plot_date(dates, winds[:, 1], 'r--', label='derived track') plt.ylabel('max. wind speed (ms$^{-1}$)') plt.legend(bbox_to_anchor=(1.07, 1.07), numpoints=1, prop={'size': 10}) ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d')) ax.yaxis.set_label_coords(-0.09, 0.5) fig.set_size_inches(6.3, 5) _save_figure('katrina_best_derived_comparison.png')
def getBPG(npoints=0, autoupdate=False): #cmd = "SELECT value_bpg, timestamp FROM pressure" cmd = "SELECT value_bpg, timestamp FROM pressure ORDER BY timestamp DESC" if npoints > 0: cmd += " limit "+str(npoints) db = dsu.openSQL(dbName) cur = db.cursor() cur.execute(cmd) resp = cur.fetchall() cur.close() db.close() vals = [] dates = [] # interactive mode on if (autoupdate): pylab.ion() for rr in resp: vals.append(float(rr[0])) dates.append(rr[1]) figure = pylab.figure(1) subplot = pylab.subplot(111) ax = figure.gca() timeRange = dates[-1]-dates[0] year_range = timeRange.days/365. #ax.plot_date(pylab.date2num(dates), vals, fmt='k.') #figure.autofmt_xdate() lines = ax.semilogy(dates, vals, 'k.') dpu.format_line_ticks(ax, year_range) if (autoupdate): figure.canvas.draw() else: pylab.show() if (autoupdate): print "autoupdate" time.sleep(5.0)
def main(csv,query,cutoff): query=nlp(query) data = pd.read_csv(csv) appendims(data) data["hashtag"] = data["description"].apply(lambda s: str2hash(str(s))) data["description"] = data["description"].apply(lambda s: (str(s))) data["sort"] = data["hashtag"].apply(lambda s: nlp(s).similarity(query)) + data["description"].apply(lambda s: nlp(s).similarity(query)) + data["image"].apply(lambda s: nlp(s).similarity(query)) data.sort_values(by='sort', ascending=False) data=data.iloc[:cutoff] data=data.sort_values(by='sort', ascending=False) locdata=data["location"].tolist() userdata=data["user"].tolist() with open('locations.txt', 'w') as filehandle: for listitem in locdata: filehandle.write('%s\n' % listitem) with open('usernames.txt', 'w') as filehandle: for listitem in userdata: filehandle.write('%s\n' % listitem) time = dates.date2num(list_of_datetimes) lfratio = data["sort"].tolist() time = data["time"].tolist() dates=[] for date in time: y = int(date[0:3]) m = int(date[5:6]) d = int(date[8:9]) dates.append((y*365)+(m*30)+d) plt.plot_date(dates, lfratio) plt.savefig("graph.png") return with open('/content/blah.txt', 'r') as filehandle: query = filehandle.readline() cutoff = int(filehandle.readline()) main('~/input.csv', query, cutoff)
def get_temperature(): days = [] dates = [] temp_min = [] temp_max = [] forecaster = mgr.forecast_at_place(place, '3h') forecast = forecaster.forecast for weather in forecast: day = datetime.utcfromtimestamp(weather.reference_time()) #day = gmt_to_eastern(weather.reference_time()) date = day.date() if date not in dates: dates.append(date) temp_min.append(None) temp_max.append(None) days.append(date) temperature = weather.temperature(unit_c)['temp'] if not temp_min[-1] or temperature < temp_min[-1]: temp_min[-1] = temperature if not temp_max[-1] or temperature > temp_max[-1]: temp_max[-1] = temperature return (days, temp_min, temp_max)
def get_dates(self, beginning: str, days_forword: int) -> list: """ Description: Generates list of dates for given number of days from beginning date ommiting holidays and weekends Parameters ---------- beginning : str date from with dates will be generated days_forword : int number of working days futher form beginning date Returns ------- List of dates """ dates = [] day = datetime.datetime.strptime(beginning, "%Y-%m-%d").date() holis = list(holidays.US(years=datetime.datetime.now().year).keys()) while len(dates) < days_forword: if day not in holis and day.weekday() < 5: dates.append(day) day += datetime.timedelta(days=1) return dates
def loadDataFromSqllite(filename, date_from, date_to): db = sqlite3.connect(filename) cursor = db.cursor() data = cursor.execute( "select amount,price,date,tid from trades where ( (date>" + str(date_from) + ") and (date<" + str(date_to) + ") and (currency='USD') )") actual_date_from = 9999999999999 actual_date_to = 0 volumes = [] prices = [] dates = [] for row in data: volume = float(row[0]) price = float(row[1]) date = int(row[2]) if (actual_date_from > date): actual_date_from = date if (actual_date_to < date): actual_date_to = date if (price > 1000): print int(row[3]) print price continue volumes.append(volume) prices.append(price) dates.append(date) cursor.close() result = {} result["prices"] = prices result["volumes"] = volumes result["dates"] = dates result["date_from"] = actual_date_from result["date_to"] = actual_date_to return result
def getAll(npoints=0): cmd = "SELECT value_bpg, value_cdg, value_convectron, timestamp FROM pressure ORDER BY timestamp DESC" if npoints > 0: cmd += " limit " + str(npoints) db = dsu.openSQL(dbName) cur = db.cursor() cur.execute(cmd) resp = cur.fetchall() cur.close() db.close() bpgs = [] cdgs = [] convs = [] dates = [] for rr in resp: bpgs.append(float(rr[0])) cdgs.append(float(rr[1])) convs.append(float(rr[2])) dates.append(rr[3]) figure = pylab.figure(1) subplot = pylab.subplot(111) ax = figure.gca() timeRange = dates[-1] - dates[0] year_range = timeRange.days / 365. #lines = ax.semilogy(dates, bpgs, 'k.') #lines = ax.semilogy(dates, cdgs, 'r.') lines = ax.plot(dates, bpgs, 'k.') lines = ax.plot(dates, cdgs, 'r.') lines = ax.plot(dates, convs, 'g.') dpu.format_line_ticks(ax, year_range) pylab.show()
def getAll(npoints=0): cmd = "SELECT value_bpg, value_cdg, value_convectron, timestamp FROM pressure ORDER BY timestamp DESC" if npoints > 0: cmd += " limit "+str(npoints) db = dsu.openSQL(dbName) cur = db.cursor() cur.execute(cmd) resp = cur.fetchall() cur.close() db.close() bpgs = [] cdgs = [] convs = [] dates = [] for rr in resp: bpgs.append(float(rr[0])) cdgs.append(float(rr[1])) convs.append(float(rr[2])) dates.append(rr[3]) figure = pylab.figure(1) subplot = pylab.subplot(111) ax = figure.gca() timeRange = dates[-1]-dates[0] year_range = timeRange.days/365. #lines = ax.semilogy(dates, bpgs, 'k.') #lines = ax.semilogy(dates, cdgs, 'r.') lines = ax.plot(dates, bpgs, 'k.') lines = ax.plot(dates, cdgs, 'r.') lines = ax.plot(dates, convs, 'g.') dpu.format_line_ticks(ax, year_range) pylab.show()
def process_data(ticker): products = {} raw = [] datees = [] dates = [] count = 0 data = load_obj(ticker) index = (list(data.keys())) for ind in index: raw.append(data[ind]) [datees.append(k[1]) for k in raw[0]] for r in raw: temp = [] [temp.append(k[0]) for k in r] products[index[count]] = temp count += 1 for date in datees: dates.append(datetime.strptime(date, "%b. %d, %Y")) quick_components = { "one": "Inventor", "two": "assets", "three": "liabilities" } for quick_component in list(quick_components.values()): mo1 = re.compile(r"{}".format(quick_component)) for ind in (index): if (mo1.search((ind))) != None: for qui in quick_components.keys(): if quick_components[qui] == quick_component: products[qui] = products[ind] for ind in index: del (products[ind]) return products, dates
def parse_source(self, fh): """Parse source into header, dates, and data. """ header_lines = [] dates = [] data = [] header = True for line in fh: if header: if line.startswith('DATE'): header = False self.header = ''.join(header_lines) continue #do not process this line else: header_lines.append(line) else: #no longer header -> process the date&datum lines line = line.strip().split() try: thedate = [int(xi) for xi in line[0].split('-') ] #e.g. 2005-09-11 -> [2005,09,11] dates.append(datetime.date(*thedate)) except: if not line: logging.info("Empty line skipped.") else: logging.warn( "Date field not a date; line skipped. Contents:\n%s" % (line)) continue try: data.append(float(line[1])) except: logging.warn("Missing value set to nan.") data.append(nan) self.dates = dates self.data = data
def tranformForcastData(self): #constructing lists dates = [] temps = [] humidity = [] clouds = [] rain = [] snow = [] #timezone and date used to display current time in place of request timezone = self.data_forecast['city']['timezone'] for i in self.data_forecast['list']: #date and time used to plot graph dates.append(datetime.utcfromtimestamp(i['dt'] + timezone)) #storing temperatures, humidity, clouds temps.append(i['main']['temp']) humidity.append(i['main']['humidity']) clouds.append(i['clouds']['all']) #try used to make sure we capture a data point for each list so it matches dates, set to zero if it doesn't exist try: #converting to inches, since I ran out of time to ask user for imperial vs metric rain.append(i['rain']['3h'] * 0.0393701) except: rain.append(0) try: #converting to inches, since I ran out of time to ask user for imperial vs metric snow.append(i['snow']['3h'] * 0.0393701) except: snow.append(0) #update data_forecast for ploting graphs self.data_forecast = { 'dates': dates, 'temps': temps, 'humidity': humidity, 'rain': rain, 'snow': snow }
def main(show=False, outfile=None): db = connect() start, end, datelist = build_movstack_datelist(db) dates = [] stations = [] duration = [] component = [] for day in datelist: daystart = datetime.datetime.combine(day, datetime.time(0, 0, 0)) dayend = datetime.datetime.combine(day, datetime.time(23, 59, 59)) data = get_data_availability(db, starttime=daystart, endtime=dayend) for di in data: stations.append("%s.%s" % (di.net, di.sta)) dates.append(di.starttime) dur=di.data_duration-abs(di.gaps_duration) duration.append((di.data_duration-abs(di.gaps_duration))) component.append(di.comp) data = pd.DataFrame({"stations": stations, "Duration": duration, "Components": component}, index=dates) data = data.groupby('stations') llen = (end-start).days + 1 ngroups = len(data.groups.keys()) matrix = np.zeros((ngroups, llen)) matrix2 = np.zeros((ngroups, llen)) start = datetime.datetime.combine(start, datetime.time(0, 0, 0)) minv=1.0 for i, group in enumerate(sorted(data.groups.keys())): new = True for di in data.groups[group]: if new: new = False if data.get_group(group)['Components'][di][-1][-1:]=='Z' and data.get_group(group)['Duration'][di].shape>0: try: durat=data.get_group(group)['Duration'][di] #print durat, durat.shape if durat[0]>86400: quality=1 else: quality=durat[0]/86400 dt = (di-start).days except: quality=0 matrix[i, dt] = quality matrix2[i, dt] = 1 if minv>quality: minv=quality gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) plt.figure(figsize=(11.8, 8.4)) ax = plt.subplot(gs[0]) plt.imshow(matrix, interpolation="none", aspect='auto', cmap='RdYlBu', vmin=minv, vmax=1, extent=(date2num(start), date2num(end), 0, ngroups), origin='lower') plt.yticks(np.arange(ngroups)+0.5, sorted(data.groups.keys())) ax.xaxis.set_major_locator( matplotlib.dates.MonthLocator()) ax.xaxis.set_major_formatter( matplotlib.dates.DateFormatter('%Y-%m-%d') ) plt.gcf().autofmt_xdate() plt.grid() ax = plt.subplot(gs[1]) plt.plot(datelist, np.sum(matrix2, axis=0)) plt.ylabel('N stations') plt.gcf().autofmt_xdate() plt.grid() if outfile: if outfile.startswith("?"): now = datetime.datetime.now() now = now.strftime('data availability on %Y-%m-%d %H.%M.%S') outfile = outfile.replace('?', now) print "output to:", outfile plt.savefig(outfile) if show: plt.show()
def printHugeMessageTable(startDate = dt.datetime(2011, 5, 24), endDate = dt.datetime(2011, 6, 1)): import datetime as dt circuit = session.query(Circuit).all() print len(circuit) clist = [c.id for c in circuit] clist.sort() numCol = max(clist) + 1 #print clist numRow = (endDate - startDate).days * 25 import numpy as np report = np.zeros((numRow, numCol)) print report.shape dates = [] originalQuery = session.query(PrimaryLog) start = startDate i = 0 while 1: end = start + dt.timedelta(hours=1) thisQuery = originalQuery # deal with double report problem #if start.hour != 23: if 1: # take reports in the hour between start and end thisQuery = thisQuery.filter(PrimaryLog.date >= start) thisQuery = thisQuery.filter(PrimaryLog.date < end) #thisQuery = thisQuery.filter(PrimaryLog.date == endDate) cclist = [tq.circuit_id for tq in thisQuery] cclist.sort() # add to numpy array report[i,cclist] = 1 dates.append(start) i += 1 # output to screen print start, print "".join([str(x).ljust(3) if x in cclist else ' - ' for x in clist]) ''' else: # change report range to prevent including the 23:59:59 report in the 23:00:00 row lastReportTime = dt.datetime(start.year, start.month, start.day, start.hour, 59, 59) thisQuery = thisQuery.filter(PrimaryLog.date > start) thisQuery = thisQuery.filter(PrimaryLog.date < lastReportTime) cclist = [tq.circuit_id for tq in thisQuery] cclist.sort() # add to numpy array report[i,cclist] = 1 dates.append(start) i += 1 # output to screen print start, print "".join([str(x).ljust(3) if x in cclist else ' - ' for x in clist]) # end of day report thisQuery = originalQuery thisQuery = thisQuery.filter(PrimaryLog.date == lastReportTime) cclist = [tq.circuit_id for tq in thisQuery] cclist.sort() # add to numpy array report[i,cclist] = 1 dates.append(lastReportTime) i += 1 # output to screen print lastReportTime, print "".join([str(x).ljust(3) if x in cclist else ' - ' for x in clist]) ''' start = start + dt.timedelta(hours=1) if start >= endDate: break
def Day_plot(title, y_list, selection, header, region_select, xaxis, yaxis): ''' This function is designed to create a plot of daily values. It is tested to work and display all regions (region select of 5). This function operates similarly to the Monthly Plots. ''' # print(data_list) c = color_select() for i in range(len(c)): r, g, b = c[i] c[i] = (r / 255., g / 255., b / 255.) print(len(y_list)) # print(len(y_list[0])) plt.rc('font', family='serif') plt.rc('xtick', labelsize='x-small') plt.rc('ytick', labelsize='x-small') plt.figure(figsize=(12, 8)) plt.title(title, fontsize=12) # Plotting title from above plt.grid() count = 0 for j in y_list: y2_list = [] y3_list = [] y = [] ymin = [] ymax = [] x = [] dates = [] # plt.xticks(range(1966,2017,5),fontsize=10) plt.yticks(fontsize=10) # plt.plot(xnew,f2(xnew),color=c[0],label="Average",markevery=100) # change to "Average"? region_lab = "Region %s" % (count + 1) for k in j: # print(len(j)) # print(i) x_pre = k[:, 0] y_temp = k[:, 1] # plt_index gets its index when variable is chosen by user. y2_list.append(y_temp) y2_list = np.array(y2_list).tolist() # y3_list = y2_list[181:]+y2_list[:181] y_pre = np.mean(y2_list, axis=0) ymin = np.min(y2_list, axis=0) ymax = np.max(y2_list, axis=0) # per_90 = np.percentile(y2_list,90,axis=0) # per_10 = np.percentile(y2_list,10,axis=0) regress = linregress(x_pre, y_pre) lin_m = regress.slope lin_b = regress.intercept lin_r = stats.pearsonr(x_pre, y_pre) per75 = np.percentile(y2_list, 75, axis=0) per75 = np.array(per75).tolist() per75 = per75[243:] + per75[:243] per25 = np.percentile(y2_list, 25, axis=0) per25 = np.array(per25).tolist() per25 = per25[243:] + per25[:243] x_pre = np.array(x_pre).tolist() # print(x_pre) for m in x_pre[243:]: m = '01' + str(int(m)) # print(m) m = datetime.strptime(m, '%y%j') m = m.strftime('%b %d') dates.append(m) for m in x_pre[:243]: m = '02' + str(int(m)) # print(m) m = datetime.strptime(m, '%y%j') m = m.strftime('%b %d') dates.append(m) # print(dates) # x = matplotlib.dates.date2num(dates) y_pre = np.array(y_pre).tolist() y = y_pre[243:] + y_pre[:243] # print(dates[0:303]) plt.plot(dates, y, color=c[count], label=region_lab, linewidth="2", markevery=100) # change to "Average"? if header != "MeanOfDay": plt.plot(x, lin_m * x + lin_b, color=c[j], label="Linear Regression", linewidth="2", linestyle="dashed") # Regression of average plt.fill_between(dates, per25, per75, alpha=0.25, linewidth=0, color=c[count]) count += 1 plt.minorticks_on() plt.ylabel(yaxis, fontsize=12) # Needs to be changed for every plot. plt.xlabel(xaxis, fontsize=12) plt.xticks(range(0, 365, 10), fontsize=10, rotation=45) plt.yticks(range(0, 40, 5), fontsize=10) plt.xlim(0, 303) plt.ylim(0, 40) plt.legend(fontsize=10)
def PlotRaces(workouts, fname): pp.figure() ax = pp.subplot(111) race_times = [] dates = [] for workout in workouts: if workout.GetTotalDist() < 41.0 * 1000: continue race_times.append(workout.GetTotalTime()) dates.append(workout.GetStartTime()) pp.plot(dates, race_times, "o", markersize=12, # mfc=COLORS[i], label=workout.name, color='k') # calculate regression days_diff = np.arange(len(dates)) for i,d in enumerate(dates): days_diff[i] = (d - min(dates)).days slope, intercept, r_value, p_value, std_err = st.linregress(days_diff, race_times) regr_y = intercept + days_diff * slope # plot regression pp.plot(dates, regr_y, "--", color='#555555', lw=1.5, label=None) month_slope = 30 * slope pp.text(0.35, 0.30, r"Trend: %d:%02.d / month" % (int(month_slope / 60), int(abs(month_slope) % 60)), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes) pp.ylabel("Marathon time", fontsize=FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) pp.xticks(rotation=75) xmin = min(dates) - datetime.timedelta(weeks=4) xmax = max(dates) + datetime.timedelta(weeks=4) pp.xlim([xmin, xmax]) ymin = 2 * 3600 ymax = 4 * 3600 + 20 * 60 pp.ylim([ymin, ymax]) lim = (ax.get_xlim()[1] - ax.get_xlim()[0]) /\ (ax.get_ylim()[1] - ax.get_ylim()[0]) ax.set_aspect(0.3 * lim) ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(HourMinFormatter)) ax.set_axisbelow(True) pp.grid(ls="-", color="#cccccc") pp.tight_layout() if fname: pp.savefig(fname) pp.show()
from matplotlib.dates import date2num import matplotlib.dates import numpy as np import matplotlib.gridspec as gridspec db = connect() start, end, datelist = build_movstack_datelist(db) print dir(start) dates = [] stations = [] for day in datelist: # print day data = get_data_availability(db,starttime=day, endtime=day) for di in data: stations.append("%s.%s"%(di.net,di.sta)) dates.append(di.starttime) data = pd.DataFrame({"stations":stations},index=dates) data = data.groupby('stations') llen = (end-start).days +1 ngroups = len(data.groups.keys()) matrix = np.zeros((ngroups,llen)) start = datetime.datetime.combine(start, datetime.time(0,0,0)) for i, group in enumerate(sorted(data.groups.keys())): print group new = True for di in data.groups[group]: if new: print group, di
def write_hrrr_grib2txt(date=datetime.datetime.now(),filenum = 24,hour = 0,directory=os.getcwd(),enddirectory=os.getcwd(),loc=[36.605, -97.485], indexes = None,write_modelhours = False): """ grabs the hrrr file corresponding to date and filenum-1 files after that for a given hour or the model predictions so many hours out at a specified time reads and compiles the data for a specific location and writes it to a json file """ wkdir = os.getcwd() if ((type(hour) == list) and not write_modelhours): print 'error, can only write one model hour at a time if write_modelhours = False' return newfilename = produce_hrrr_txt_string(date=date,hour=hour,filenum=filenum,loc=loc,indexes =indexes,modelhours=write_modelhours) if newfilename in os.listdir(enddirectory): print 'error file already exists' return newfilename if not write_modelhours: datestrings = [] for i in range(filenum): datestrings.append(datetime.datetime(date.year,date.month,date.day,date.hour+i)) hourslists = [[hour] for i in range(filenum)] else: date = date-datetime.timedelta(hours=min(hour)) filenum = hour[1]-hour[0]+1 datestrings = [date] hourslists = [range(hour[0],hour[1]+1)] filelists = produce_hrrr_grib2strings(datestrings,hourslists) if filelists == []: return '' data = [] dates = [] k = -1 for i in range(len(filelists)): if filelists[i] in os.listdir(directory): x = read_hrrr_spec(filename = filelists[i], directory = directory,no_txt = True,coords=indexes) print filelists[i] if x != None and i>k: k = len(filelists)+1 parameterlist = x[1] loc = x[2] indexes = x[3] units = x[4] if x == None: continue x[0] = np.array(x[0]) x[0] = x[0].tolist() data.append(x[0]) x = None dates.append(matplotlib.dates.date2num(datestrings[i])) if not ('parameterlist' in vars().keys()): return os.chdir(enddirectory) #remove HRRR hours that have missing pressure levels i = 0 while i<len(data): if data[i] != None: for j in data[i]: if type(j) == type(np.array([])): try: data.pop(i) dates.pop(i) except IndexError: pass else: try: data.pop(i) dates.pop(i) except IndexError: pass i = i+1 f = open(newfilename, 'w') try: json.dump([data,dates,parameterlist,loc,indexes,units],f) except TypeError: print "array found in json export error -> pressure levels missing from some hour" return '' f.close() os.chdir(wkdir) return newfilename
hrrr_hours = [(i+timeshift).hour for i in datetimes] ind = hrrr[2].index('Cloud mixing ratio') hrrr_data = hrrr[0] hrrr_data = np.array(hrrr_data) hrrr_c = hrrr_data[:,ind,:] c_cover = 0 for i in range(hrrr_c.shape[0]): temp = hrrr_c[i,:].max(axis=0) if temp>hrrr_margin: c_cover = c_cover+1 c_fract = float(c_cover)/float(hrrr_c.shape[0]) c_fracts_hrrr.append(c_fract) dates.append(dates[i]) hrrr_hoursets.append(hrrr_hours) #kill storage vars hrrr = None hrrr_data = None hrrr_c = None hrrrf.close() #radar analysis os.chdir(radar_directory) radarf = open(radar_strings[i],'r') radar = json.load(radarf) radar = np.array(radar[0])
def Calculation(): lower_stock = Ticker_entry.get( ) #collecting the user input from the entry in the GUI stock = lower_stock.upper( ) #Making sure that even if the ticker entry was lower case, it is converted into upper case so that the URL is correct url = make_url(stock) #Passing the ticker through the predefined function page = requests.get( url) #Requesting the HTML code of the website whose URL was accessed soup = BeautifulSoup( page.content, "lxml" ) #Converting the HTML code of the website into a beautifulsoup object, so that it can be manipulated try: #Finding and inserting the current price current_number = soup.find( 'span', attrs={'class': 'priceText__1853e8a5'} ) #Searching for the piece of HTML code with these specific attributes. These attributes correspond to where the current price of the stock is displayed current_price = current_number.text #Taking only the portion of that code that contains actual information, meaning leaving HTML tags behind Stock_price_output.insert(0, "$") #Inserting dollar symbol into the entry Stock_price_output.insert( 1, current_price) #Inserting the stock price into the entry #Finding and inserting opening price opening_number = soup.find( 'div', attrs={'class': 'value__b93f12ea'} ) #Searching for the piece of HTML code with these specific attributes. These attributes correspond to where the opening price of the stock is displayed opening_price = opening_number.text #Taking only the portion of that code that contains actual information, meaning leaving HTML tags behind Stock_price_day_output.insert( 0, "$") #Inserting dollar symbol into the entry Stock_price_day_output.insert( 1, opening_price ) #Inserting the opening price of the stock into the entry #Finding and inserting last closing price closing_numbers = soup.find_all( 'div', attrs={'class': 'value__b93f12ea'} ) #Searching for the piece of HTML code with these specific attributes. These attributes correspond to where the last closing price of the stock is display closing_number = closing_numbers[ 1] #There are multiple closing prices on the website, so this one takes the one that corresponds to the last closing number. closing_price = closing_number.text #Taking only the portion of that code that contains actual information, meaning leaving HTML tags behind Last_closing_price_output.insert( 0, "$") #Inserting dollar symbol into the entry Last_closing_price_output.insert( 1, closing_price ) #Inserting the last closing price of the stock into that entry #Finding and inserting news news = soup.find_all( 'div', attrs={'class': 'headline__07dbac92'} ) #Searching for the piece of HTML code with these specific attributes. These attributes correspond to where news about the stock is displayed news_1 = news[ 1].text #There are 3 news titles on the website that all contain the same tags. This code takes the first one. news_2 = news[ 2].text #There are 3 news titles on the website that all contain the same tags. This code takes the second one. news_3 = news[ 3].text #There are 3 news titles on the website that all contain the same tags. This code takes the third one. Stock_news_output1.insert( 0, news_1) #Inserting the first news title into the first entry. Stock_news_output2.insert( 0, news_2) #Inserting the second news title into the second entry. Stock_news_output3.insert( 0, news_3) #Inserting the third news title into the third entry. #Drawing the graph of the stock historical_url = make_historical_url( stock ) #Receiving the URL that contains a data table with all the information of the stock historical_page = requests.get( historical_url ) #Requesting the HTML code of the website whose URL was accessed soup_2 = BeautifulSoup( historical_page.content, "lxml" ) #Transforming the HTML code into a beautifulsoup object that can be manipulated all_numbers = soup_2.find( 'tbody' ) #Finding code segment that corresponds the body of the table, the part that contains all the information about the stocks all_nums = all_numbers.text #Taking only the portion of that code that contains actual information, meaning leaving HTML tags behind all_nums_1 = all_nums.split( ) #Transforming the single string of elements into an array that contains each element length = len(all_nums_1) #Calling the length of this array 'length' prices = [ ] #Creating an empty array that will contain the prices of the stocks dates = [ ] #Creating an empty array that will contain the corresponding dates of the stocks current_time = datetime.datetime.now( ) #Taking the the date of today, so that it may be used in the array 'dates' current_time_format = current_time.strftime( "%m/%d/%Y" ) #Letting the program know what the current time format of the dates is. It is mm/dd/yyyy. all_nums_1[ 0] = current_time_format #Replacing the first element in the array 'all_nums_1' with the current date acquired earlier for t in range( int(length / 6) ): #Creating a for loop that will select the elements to be put inside of the array 'prices' index = t * 6 + 4 #There are multiple prices in the table, and the closing prices have a difference of six elements between them, the first element starting from all_nums_1[4]. prices.append( all_nums_1[index]) #Appending the element to the array for t in range( int(length / 6) ): #Creating a for loop that will select the elements to be put inside of the array 'dates' index = t * 6 #The dates are inside of the table, with a difference of six elements between them. date_str = all_nums_1[ index] #Taking the value stored in the 'index' position of the array 'all_nums_1' format_str = '%m/%d/%Y' #Defining the format of the dates that we want (mm/dd/yyyy) datetime_object = datetime.datetime.strptime( date_str, format_str ) #Using a function in the API 'datetime' to convert the dates to the format mm/dd/yyyy dates.append( datetime_object ) #Appending the correctly formatted dates to the array 'dates' final_dates = matplotlib.dates.date2num( dates ) #Converting all the dates in the array into a format that matplotlib can understand #plotting the graph of the last 3 months of stock price plt.plot_date( final_dates, prices, '-o' ) #Graphing the function, with the "final_dates" being on the x-axis, and the "prices" being on the y-axis. The '-o' is used to connect the data points in the graph plt.xticks( rotation=90 ) #Rotating the text on the x-axis by 90 degrees, so that it is leggible plt.xlabel('Date') #Labeling the x-axis 'Date' plt.ylabel('Price ($)') #Labeling the y-axis 'Price ($)' plt.suptitle("Price of the %s stock in the last 3 months" % stock) #Titling the graph plt.show() #Showing the graph except: Ticker_entry.delete( 0, END ) # Deleting any prior text in the space where the message will be displayed Ticker_entry.insert(0, "Please enter a valid ticker!" ) # Inserting the message into the entry box
import matplotlib.dates import numpy as np import matplotlib.gridspec as gridspec db = connect() start, end, datelist = build_movstack_datelist(db) print dir(start) dates = [] stations = [] for day in datelist: # print day data = get_data_availability(db,starttime=day, endtime=day) for di in data: net, sta, comp, starttime, endtime, data_duration, gaps_duration, samplerate, flag = di stations.append("%s.%s"%(net,sta)) dates.append(starttime) data = pd.DataFrame({"stations":stations},index=dates) data = data.groupby('stations') llen = (end-start).days +1 ngroups = len(data.groups.keys()) matrix = np.zeros((ngroups,llen)) start = datetime.datetime.combine(start, datetime.time(0,0,0)) for i, group in enumerate(sorted(data.groups.keys())): print group new = True for di in data.groups[group]: if new: print group, di
def main(show=False, outfile=None): db = connect() start, end, datelist = build_movstack_datelist(db) dates = [] stations = [] for day in datelist: daystart = datetime.datetime.combine(day, datetime.time(0, 0, 0)) dayend = datetime.datetime.combine(day, datetime.time(23, 59, 59)) data = get_data_availability(db, starttime=daystart, endtime=dayend) for di in data: stations.append("%s.%s" % (di.net, di.sta)) dates.append(di.starttime) data = pd.DataFrame({"stations": stations}, index=dates) data = data.groupby('stations') llen = (end-start).days + 1 ngroups = len(data.groups.keys()) matrix = np.zeros((ngroups, llen)) start = datetime.datetime.combine(start, datetime.time(0, 0, 0)) for i, group in enumerate(sorted(data.groups.keys())): new = True for di in data.groups[group]: if new: print group, di, new = False dt = (di-start).days matrix[i, dt] = 1 print di gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) plt.figure(figsize=(11.8, 8.4)) ax = plt.subplot(gs[0]) plt.imshow(matrix, interpolation="none", aspect='auto', cmap='bwr', vmin=-1, vmax=1, extent=(date2num(start), date2num(end), 0, ngroups), origin='lower') plt.yticks(np.arange(ngroups)+0.5, sorted(data.groups.keys())) ax.xaxis.set_major_locator( matplotlib.dates.MonthLocator()) ax.xaxis.set_major_formatter( matplotlib.dates.DateFormatter('%Y-%m-%d') ) plt.gcf().autofmt_xdate() plt.grid() ax = plt.subplot(gs[1]) plt.plot(datelist, np.sum(matrix, axis=0)) plt.ylabel('N stations') plt.gcf().autofmt_xdate() plt.grid() if outfile: if outfile.startswith("?"): now = datetime.datetime.now() now = now.strftime('data availability on %Y-%m-%d %H.%M.%S') outfile = outfile.replace('?', now) print "output to:", outfile plt.savefig(outfile) if show: plt.show()
import matplotlib.dates import numpy as np import matplotlib.gridspec as gridspec db = connect() start, end, datelist = build_movstack_datelist(db) print dir(start) dates = [] stations = [] for day in datelist: # print day data = get_data_availability(db, starttime=day, endtime=day) for di in data: net, sta, comp, starttime, endtime, data_duration, gaps_duration, samplerate, flag = di stations.append("%s.%s" % (net, sta)) dates.append(starttime) data = pd.DataFrame({"stations": stations}, index=dates) data = data.groupby('stations') llen = (end - start).days + 1 ngroups = len(data.groups.keys()) matrix = np.zeros((ngroups, llen)) start = datetime.datetime.combine(start, datetime.time(0, 0, 0)) for i, group in enumerate(sorted(data.groups.keys())): print group new = True for di in data.groups[group]: if new: print group, di
allcsv = open("all.csv") items = [] dates = [] pounds = [] codes = [] rows = [] cReader = csv.reader(allcsv, delimiter=',', quotechar='"') # Iterate through all transactions for row in cReader: pounds.append(row[4]) d = datetime.datetime(*time.strptime(row[0],"%d/%m/%Y")[0:5]) items.append([d,row[2],row[3],row[4]]) dates.append(d) codes.append(row[1]) rows.append(row) curdate = time.localtime() # Find index of first date in this month firstdatethismonth = 1 while True: d= dates[-firstdatethismonth] if curdate.tm_mon == d.month and curdate.tm_year == d.year: firstdatethismonth += 1 else: break firstdatethismonth-=1