'Name', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap' ] df = {} df = pd.DataFrame(columns=columns) col_time = ['DBegin', 'DEnd', 'Type', 'VBegin', 'VEnd', 'Volume', 'Mean'] dl = {} dl = pd.DataFrame(columns=col_time) print cryptoconcurrenciesName for cryptoName in cryptoconcurrenciesName: file = mypath + cryptoName cryptoconcurrencies = rp.loadDataCSV(file, target=rp.NONE, null_target_procedure=rp.DELETE_ROW, null_procedure=rp.MEAN, na_values='-') df = df.append(cryptoconcurrencies.dataFrame) df = df.fillna(cryptoName[0:-4]) monedas = ['Bitcoin'] # ,'Ethereum','Monero'] bamboo = Bamboo('Coins', df, columns, target='Market Cap') bambooList = rp.divideDataFrame(bamboo, 'Name') for moneda in monedas: for coso in bambooList: # print coso.dataFrame['Name'].iloc[0] if coso.dataFrame['Name'].iloc[0] == moneda: cdf = coso.dataFrame
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed Nov 15 16:12:35 2017 @author: edu """ import RedPandas as RP weeks, features, n_nulls = RP.loadDataCSV(name='dengue_train.csv', target='total_cases', null_target_procedure=RP.DELETE_ROW, null_procedure=RP.MEAN) #divide into iq and sj # set the index to be this and don't drop #weeks.set_index(keys=['city'], drop=False,inplace=True) # get a list of city names cityNames = weeks['city'].unique().tolist() iqWeeks = weeks.loc[weeks.city == 'iq'] sjWeeks = weeks.loc[weeks.city == 'sj'] #0 Delete outliers #1st iteration iqWeeks.drop(iqWeeks.index[[244, 104, 3, 103, 51, 306, 10, 115, 273]], inplace=True) sjWeeks.drop(sjWeeks.index[[507, 500, 705, 800]], inplace=True) #2nd iteration iqWeeks.drop(iqWeeks.index[[23]], inplace=True)
# -*- coding: utf-8 -*- # 1. Load data import RedPandas as RP data, name, n_nulls = RP.loadDataCSV(name='dengue_train.csv', target='total_cases', null_target_procedure=RP.DELETE_ROW, null_procedure=RP.MEAN) name = list(data.head(0)) import numpy #divide into iq and sj ''' # set the index to be this and don't drop data.set_index(keys=['city'], drop=False,inplace=True) ''' # get a list of city names cityNames = data['city'].unique().tolist() iqWeeks = data.loc[data.city == 'iq'] sjWeeks = data.loc[data.city == 'sj'] name.remove('total_cases') name.remove('city') name.remove('year') name.remove('week_start_date') #0 Delete outliers #1st iteration iqWeeks.drop(iqWeeks.index[[244, 104, 3, 103, 51, 306, 10, 115, 273]], inplace=True)
# -*- coding: utf-8 -*- """ Created on Wed Dec 06 16:00:23 2017 @author: Edu """ import RedPandas as rp import numpy as np import pandas as pd sj_train = rp.loadDataCSV('db/sj_train.csv', target='total_cases') sj_test = rp.loadDataCSV('db/sj_test.csv', target='total_cases') #San Juan analysis sj_train.features.remove('year') sj_train.features.remove('total_cases') sj_train.features.remove('city') sj_train.features.remove('week_start_date') #CV tests #Decision tree #This is mostly for feature selection min_range = 2 max_range = 30 depths = [] #Multiple CV tests done since it's random so we get the most frequent depth for x in range(0, 1):
# -*- coding: utf-8 -*- import RedPandas as rp den_train = rp.loadDataCSV('db/dengue_train.csv', target='total_cases', null_target_procedure=rp.DELETE_ROW, null_procedure=rp.MEAN) den_test = rp.loadDataCSV('db/dengue_test.csv', null_target_procedure=rp.DELETE_ROW, null_procedure=rp.MEAN) #Dividing the training into 2 dataframes, Iquitos and San Juan den_train_div = rp.divideDataFrame(den_train, 'city') for div in den_train_div: if div.dataFrame['city'].iloc[0] == 'iq': iq_train = div elif div.dataFrame['city'].iloc[0] == 'sj': sj_train = div div.reportBasicInfo(printOnScreen=False) #Dividing the test into 2 dataframes, Iquitos and San Juan den_test_div = rp.divideDataFrame(den_test, 'city')
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Nov 21 12:57:30 2017 @author: edu """ # 0 load data import RedPandas as rp import pandas as pd import matplotlib.pyplot as plt weeks, features, n_nulls = rp.loadDataCSV(name='dengue_train.csv', target='total_cases', null_target_procedure=rp.DELETE_ROW, null_procedure=rp.MEAN) rp.showInfoDF(weeks, features, n_nulls) #divide into iq and sj # get a list of city names cityNames = weeks['city'].unique().tolist() iqWeeks = weeks.loc[weeks.city == 'iq'] sjWeeks = weeks.loc[weeks.city == 'sj'] #Outlier cleaning done in Task5 #0 Delete outliers #1st iteration iqWeeks.drop(iqWeeks.index[[244, 104, 3, 103, 51, 306, 10, 115, 273]], inplace=True)