# Convert data types df.Date = df.Date.astype('datetime64') df.StandardDate = df.StandardDate.astype('datetime64') df.DateSK = df.DateSK.astype('int') df.Day = df.Day.astype('int') df.DOWInMonth = df.DOWInMonth.astype('int') df.DayOfYear = df.DayOfYear.astype('int') df.WeekOfYear = df.WeekOfYear.astype('int') df.WeekOfMonth = df.WeekOfMonth.astype('int') df.Month = df.Month.astype('int') df.Quarter = df.Quarter.astype('int') df.Year = df.Year.astype('int') print 'Data Types' print df.dtypes #From Excel to DataFrame from pandas import DataFrame, ExcelFile import pandas as pd import json # Path to excel file # Your path will be different, please modify the path below. location = r'//covenas/decisionsupport/meinzer/projects/network/Contract Deliverables for Emanio Reports.xls'
elif strat == 'sell': for k in range(len(df)): #''' Short-only strategy ''' strat_rsi_bb_crossover_2(k, df.Close, buy_symbol, _hard_stop ,avg_trueRange, valid_trading_period, \ rsi_eventArray, rsi_event_id, rsi_action, posSize, posPrc, dynamicPNL, realizedPNL) else: for k in range(len(df)): #''' Long-Short strategy ''' strat_rsi_bb_crossover_3(k, df.Close, buy_symbol, _hard_stop ,avg_trueRange, valid_trading_period, \ rsi_eventArray, rsi_event_id, rsi_action, posSize, posPrc, dynamicPNL, realizedPNL) ''' Generate year summary ''' yearSummary = DataFrame(index = np.arange(len(df)), columns=['TradeID', 'Year', 'Symbol', 'Strat', \ 'StartTrading', 'EndTrading', 'Date', 'DateTime', 'Close', 'IsPeriod', 'Action', \ 'PosSize', 'PosDir', 'PosPrc', 'DynamicPNL', 'RealizedPNL']) yearSummary.TradeID = tradeID yearSummary.Year = year yearSummary.Symbol = symbol yearSummary.Strat = strat #yearSummary.Buy = MRCI.ix[i, 'Buy'] + " " + MRCI.ix[i, 'b_Month'] + str(y_buy) #yearSummary.Sell = MRCI.ix[i, 'Sell'] + " " + MRCI.ix[i, 's_Month'] + str(y_sell) yearSummary.StartTrading = datetime_toString(start_trading_date) yearSummary.EndTrading = datetime_toString(end_trading_date) yearSummary.Date = df.Date yearSummary.DateTime = dt #yearSummary.DateCode = t yearSummary.Close = df.Close yearSummary.IsPeriod = seasonal_trading_period yearSummary.Action = rsi_action yearSummary.PosSize = abs(posSize) yearSummary.PosDir = posSize yearSummary.PosPrc = posPrc
dum1 = line[0].split('<td>')[1] alldata[colheads[jj]][salescnt] = dum1 salescnt += 1 if colheads[jj] == 'Album': albumURLs.append(tdata[i+jj+1].split()[1][6:-1]) alldata['Album URL'] = albumURLs musicinfo = DataFrame(alldata) musicinfo.Year = musicinfo.Year.astype(float) musicinfo['Sales (millions)'] = musicinfo['Sales (millions)'].astype(float) # plt.hist(musicinfo.Year, bins=range(1988,2014)) # # plt.scatter(musicinfo.Year, musicinfo['Sales (millions)']) # plt.title("Number of Albums on RIAA\nList of Top Hip Hop Albums") # plt.ylabel('# Albums') # plt.xlabel("Year") # plt.minorticks_on() # plt.show() ################################################################# ## Now, we have our Album URLs. Let's get the album pages... ################################################################# fout = open('./albums/albumlist.txt','w')