Ejemplo n.º 1
0
    'Name', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'
]
df = {}
df = pd.DataFrame(columns=columns)

col_time = ['DBegin', 'DEnd', 'Type', 'VBegin', 'VEnd', 'Volume', 'Mean']
dl = {}
dl = pd.DataFrame(columns=col_time)

print cryptoconcurrenciesName
for cryptoName in cryptoconcurrenciesName:
    file = mypath + cryptoName

    cryptoconcurrencies = rp.loadDataCSV(file,
                                         target=rp.NONE,
                                         null_target_procedure=rp.DELETE_ROW,
                                         null_procedure=rp.MEAN,
                                         na_values='-')
    df = df.append(cryptoconcurrencies.dataFrame)
    df = df.fillna(cryptoName[0:-4])

monedas = ['Bitcoin']  # ,'Ethereum','Monero']

bamboo = Bamboo('Coins', df, columns, target='Market Cap')
bambooList = rp.divideDataFrame(bamboo, 'Name')
for moneda in monedas:

    for coso in bambooList:
        #        print coso.dataFrame['Name'].iloc[0]
        if coso.dataFrame['Name'].iloc[0] == moneda:
            cdf = coso.dataFrame
Ejemplo n.º 2
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 15 16:12:35 2017

@author: edu
"""

import RedPandas as RP

weeks, features, n_nulls = RP.loadDataCSV(name='dengue_train.csv',
                                          target='total_cases',
                                          null_target_procedure=RP.DELETE_ROW,
                                          null_procedure=RP.MEAN)

#divide into iq and sj
# set the index to be this and don't drop
#weeks.set_index(keys=['city'], drop=False,inplace=True)
# get a list of city names
cityNames = weeks['city'].unique().tolist()
iqWeeks = weeks.loc[weeks.city == 'iq']
sjWeeks = weeks.loc[weeks.city == 'sj']

#0 Delete outliers
#1st iteration
iqWeeks.drop(iqWeeks.index[[244, 104, 3, 103, 51, 306, 10, 115, 273]],
             inplace=True)
sjWeeks.drop(sjWeeks.index[[507, 500, 705, 800]], inplace=True)
#2nd iteration
iqWeeks.drop(iqWeeks.index[[23]], inplace=True)
# -*- coding: utf-8 -*-

# 1. Load data
import RedPandas as RP

data, name, n_nulls = RP.loadDataCSV(name='dengue_train.csv',
                                     target='total_cases',
                                     null_target_procedure=RP.DELETE_ROW,
                                     null_procedure=RP.MEAN)
name = list(data.head(0))
import numpy

#divide into iq and sj
'''
# set the index to be this and don't drop
data.set_index(keys=['city'], drop=False,inplace=True)
'''
# get a list of city names
cityNames = data['city'].unique().tolist()
iqWeeks = data.loc[data.city == 'iq']
sjWeeks = data.loc[data.city == 'sj']

name.remove('total_cases')
name.remove('city')
name.remove('year')
name.remove('week_start_date')

#0 Delete outliers
#1st iteration
iqWeeks.drop(iqWeeks.index[[244, 104, 3, 103, 51, 306, 10, 115, 273]],
             inplace=True)
Ejemplo n.º 4
0
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 06 16:00:23 2017

@author: Edu
"""

import RedPandas as rp
import numpy as np
import pandas as pd

sj_train = rp.loadDataCSV('db/sj_train.csv', target='total_cases')

sj_test = rp.loadDataCSV('db/sj_test.csv', target='total_cases')

#San Juan analysis
sj_train.features.remove('year')
sj_train.features.remove('total_cases')
sj_train.features.remove('city')
sj_train.features.remove('week_start_date')

#CV tests
#Decision tree
#This is mostly for feature selection
min_range = 2
max_range = 30
depths = []

#Multiple CV tests done since it's random so we get the most frequent depth

for x in range(0, 1):
Ejemplo n.º 5
0
# -*- coding: utf-8 -*-

import RedPandas as rp

den_train = rp.loadDataCSV('db/dengue_train.csv',
                           target='total_cases',
                           null_target_procedure=rp.DELETE_ROW,
                           null_procedure=rp.MEAN)

den_test = rp.loadDataCSV('db/dengue_test.csv',
                          null_target_procedure=rp.DELETE_ROW,
                          null_procedure=rp.MEAN)

#Dividing the training into 2 dataframes, Iquitos and San Juan

den_train_div = rp.divideDataFrame(den_train, 'city')

for div in den_train_div:

    if div.dataFrame['city'].iloc[0] == 'iq':
        iq_train = div

    elif div.dataFrame['city'].iloc[0] == 'sj':
        sj_train = div

    div.reportBasicInfo(printOnScreen=False)

#Dividing the test into 2 dataframes, Iquitos and San Juan

den_test_div = rp.divideDataFrame(den_test, 'city')
Ejemplo n.º 6
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 21 12:57:30 2017

@author: edu
"""

# 0 load data
import RedPandas as rp
import pandas as pd
import matplotlib.pyplot as plt

weeks, features, n_nulls = rp.loadDataCSV(name='dengue_train.csv',
                                          target='total_cases',
                                          null_target_procedure=rp.DELETE_ROW,
                                          null_procedure=rp.MEAN)

rp.showInfoDF(weeks, features, n_nulls)

#divide into iq and sj
# get a list of city names
cityNames = weeks['city'].unique().tolist()
iqWeeks = weeks.loc[weeks.city == 'iq']
sjWeeks = weeks.loc[weeks.city == 'sj']

#Outlier cleaning done in Task5
#0 Delete outliers
#1st iteration
iqWeeks.drop(iqWeeks.index[[244, 104, 3, 103, 51, 306, 10, 115, 273]],
             inplace=True)