Esempio n. 1
0
@author: Madhu
"""
# Import libraries necessary for this project
import sklearn
import numpy as np
from matplotlib import pyplot as plt
print "-----------------------------------------------------------------------"
print('The scikit-learn version is {}.'.format(sklearn.__version__))
#get the working directory and filename
path = r'C:\Users\pmspr\Documents\HS\MS\Sem 2\EECS 738\Lab\2\Work\Code\Data'

#load data using load class and print describe of data
from projectFunctions import loadData
filename = "forestfires.csv"

data = loadData(path, filename)

##explore the data
from projectFunctions import exploreData
exploreData(data)

# Success - Display the first record
if data is not None:
    display(data.head(n=1))
    print data.describe(include='all')

drop_col = ['X', 'Y', 'rain', 'area']
features_raw = data.drop(drop_col, axis=1)
target_raw = data['area']
if features_raw is not None:
    display(features_raw.head(n=1))
Esempio n. 2
0
# -*- coding: utf-8 -*-
"""
Created on Sun May  3 13:54:50 2020

@author: pmspr
"""
#Step 0: Import relevant packages

#Step 1: Load imdb database
from projectFunctions import loadData
x_train, y_train, x_test, y_test = loadData()

#Step 2: Pad train and test data
from projectFunctions import padInput
x_train, x_test = padInput(x_train, x_test)

#Step 3: Create a 1D CNN for baseline
from projectFunctions import cnn11D
cnn11D(x_train, x_test, y_train, y_test)
Esempio n. 3
0
#import networkx as nx
import numpy as np
import pandas as pd
from pandas import compat

compat.PY3 = True
print(
    "-----------------------------------------------------------------------")
print('The scikit-learn version is {}.'.format(sklearn.__version__))

#load functions from
from projectFunctions import loadData, sentimentPolarity, exploreData, missingValues

path = r'C:\Users\pmspr\Documents\HS\MS\Sem 3\EECS 731\Week 5\HW\Git\EECS-731-Project-3\Data'
filename = "links.csv"
data_l = loadData(path, filename)

path = r'C:\Users\pmspr\Documents\HS\MS\Sem 3\EECS 731\Week 5\HW\Git\EECS-731-Project-3\Data'
filename = "movies.csv"
data_m = loadData(path, filename)
genres = [
    'Action', 'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime',
    'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
    'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
]

d1 = pd.DataFrame(columns=['movieId', 'title', 'genre'])
for ind, row in data_m.iterrows():
    gstr = row['genres']
    glst = gstr.split("|")
    cnt = 0
Esempio n. 4
0
#for fn in uploaded.keys():
#  print('User uploaded file "{name}" with length {length} bytes'.format(
#      name=fn, length=len(uploaded[fn])))

# In[2]:

#!ls

# ### **Load data**
#
# In the next cell we create a variable with the path of the training and test file. We use the *loadData ()* function that loads the data with the specified input format and sorts these by user and date.

# In[2]:

# Load datasets
df_train = pF.loadData("train.txt")
df_train.head()

df_test = pF.loadData("test.txt")

# In[3]:

df_train.shape

# In[4]:

n_users = df_train.ID_Customer.unique().shape[0]
n_items = df_train['Cod_Prod'].unique().shape[0]
print(str(n_users) + ' users')
print(str(n_items) + ' products')
Esempio n. 5
0
import seaborn as sns
sns.set()
from matplotlib import pyplot as plt
from pandas import compat

compat.PY3 = True
print(
    "-----------------------------------------------------------------------")
print('The scikit-learn version is {}.'.format(sklearn.__version__))

#load functions from
from projectFunctions import loadData, exploreData, missingValues, transformData

path = r'C:\Users\pmspr\Documents\HS\MS\Sem 3\EECS 731\Week 6\HW\Git\EECS-731-Project-4\Data'
filename = "nfl_games.csv"
data_raw = loadData(path, filename)
data = data_raw.drop(['neutral', 'playoff', 'date', 'season', 'result1'],
                     axis=1)
#data.rename(columns={'elo_prob1':'target'},inplace=True)
teams = data['team1'].unique()

data_ct = data.groupby(['team1', 'team2'], as_index=False).count()
data_ct = pd.DataFrame(data_ct, columns=['team1', 'team2', 'score1'])
data_ct.rename(columns={'score1': 'Count'}, inplace=True)
data = pd.merge(data, data_ct, on=['team1', 'team2'], how='inner')

#Check the missing values
misVal, mis_val_table_ren_columns = missingValues(data)
print(mis_val_table_ren_columns.head(20))

t1 = pd.DataFrame(data, columns=['team1', 'score1'])