def dbReport(self, input_db, inputValues, taxon=9606, output="raw"): """Returns report :param output: returns dataframe if set to dataframe :: s.dbReport("Ensembl Gene ID", ['ENSG00000121410', 'ENSG00000171428']) """ inputValues = self._interpret_input_db(inputValues) params = self.suds.factory.create('dbReportParams') params.input = input_db params.taxonId = str(taxon) # do we need the cast ? params.inputValues = inputValues res = self.serv.dbReport(params) if output == "dataframe": try: import pandas as pd except: print( "Pandas library is not installed. dataframe are not available" ) df = pd.readcsv(io.StringIO(res.strip()), sep="\t") return df else: return res
def dbReport(self, input_db, inputValues, taxon=9606, output="raw"): """Returns report :param output: returns dataframe if set to dataframe :: s.dbReport("Ensembl Gene ID", ['ENSG00000121410', 'ENSG00000171428']) """ inputValues = self._interpret_input_db(inputValues) params = self.suds.factory.create('dbReportParams') params.input = input_db params.taxonId = str(taxon) # do we need the cast ? params.inputValues = inputValues res = self.serv.dbReport(params) if output == "dataframe": try: import pandas as pd except: print("Pandas library is not installed. dataframe are not available") df = pd.readcsv(io.StringIO(res.strip()), sep="\t") return df else: return res
def get_top_votos(cargo, indice): import pandas as pd dados = pd.readcsv('dados/dados.csv') dados = dados.loc[dados['CARGO'] == cargo] dados = dados.sort_values('QTD_VOTOS', ascending=False).head(indice) return dados
def import_db(): conn = sqlite3.connect("studentdb.db") c = conn.cursor() safety = input("WARNING, before you do this ensure you have made a copy. Do you wish to proceed? y/n: ") if safety != "y" and safety != "Y": print ("Response recieved, aborting operation") return file_name = input("Please specify a file path with valid extension: ") db_vals_csv = pandas.readcsv(file_name) db_vals_csv.to_sql("Students", conn, if_exists="append", index = False) conn.commit() conn.close()
def paradigm_info(design_text_file): """Create the inputs for first- (or second-) level analyses based on a csv design file. The expected column titles for the csv are 'name' for the specific task (e.g., 'fp_run1'),'conditions' (e.g.,'highCal'), 'onsets',and 'durations'""" from nipype.interfaces.base import Bunch import pandas as pd try: df = pd.DataFrame(pd.readcsv(design_text_file)) conditions = [] onsets = [] durations = [] for task in sorted(set(df['name'])): df = df.loc[df['name'] == task, :] # limiting to the applicable data conditions.append(df['conditions']) onsets.append(df['onsets']) durations.append(df['durations']) par_info = Bunch([conditions], [onsets], [durations]) except: # TODO test to see what types of errors may be common (lists in the durations, no column name, etc.) print('Error in paradigm info.') return par_info
#!/usr/bin/env python # coding: utf-8 # In[25]: import numpy as np import scipy as ss from collections import Counter import math from scipy import stats import pandas as pd data=pd.read_csv("") #print(data.columns) traindf=pd.readcsv('') y=traindf['Overall']>=87 x=traindf.copy() del x['Overall'] feature_name=list(x.columns) num_feats=30 print(feature_name) def cor_selector(x,y,num_feats): cor_list=[] feature_name=x.columns.tolist() for i in x.columns.tolist(): cor=np.corrcoef(x[i],y)[0,1] cor_list.append(cor) cor_list=[0 if np.isnan(i) else i for i in cor_list] cor_feature=x.iloc[:,np.argsort(np.abs(cor_list))[-num_feats:]].columns.tolist() cor_support=[True if i in cor_feature else False for i in feature_name]
#importing important libraries import pandas as pd import numpy as np import matplotlib.pyplot as plt import sklearn.modelselection as sl from sklearn import linearmodel from sklearn.metrics import meansquarederror, r2score #loading dataset and storing in a variable named dataset dataset = pd.readcsv('Climate.csv') #Creating primary array of temperature and rainfall Temperature = dataset[['MeanTemperatureMinimum']] Rainfall = dataset[['MeanRainfallMM']] #Splitting the dataset Temptrain, Temptest, Raintrain, Raintest = sl.traintestsplit(Temperature, Rainfall, testsize=0.2, shuffle=True) #Creating regression object regr = linearmodel.LinearRegression(fitintercept=True, normalize=False) #Training the model regr.fit(Temptrain, Raintrain) #Using the model to predict values from Test Dataset Rainpred = regr.predict(Temptest) #Printing mean square error and r2 score
import pandas as pd from pandas.tseries.resample import TimeGrouper from pandas.tseries.offsets import DateOffset flyers = pd.readcsv('/Users/ramintaghizada/TwitterMining/Flask/myproject/test.csv') flyers['createdat'] = pd.todatetime(pd.Series(flyers['createdat'])) flyers.setindex('createdat', drop=False, inplace=True) flyers.index = flyers.index.tzlocalize('GMT').tzconvert('EST') flyers.index = flyers.index - DateOffset(hours = 12) flyers.index
#importing important libraries import pandas as pd import matplotlib.pyplot as plt import numpy as np import sklearn.modelselection as sl from sklearn.linearmodel import LogisticRegression import seaborn as sns from sklearn import metrics from sklearn.preprocessing import StandardScaler employment = {'Salaried': 0, 'Self employed': 1, ' ': 1} #importing dataset dataset = pd.readcsv('CreditCardData.csv') dataset = dataset.dropna() #creating dummy variable for column EmploymentType dataset.EmploymentType = [ employment[number] for number in dataset.EmploymentType ] loanDefaulter = dataset[['loandefault']] factors = dataset[[ 'disbursedamount', 'assetcost', 'EmploymentType', 'PRI.CURRENT.BALANCE', 'PRI.SANCTIONED.AMOUNT', 'PRIMARY.INSTAL.AMT' ]] #creating training and testing set and scaling the data factorsTrain, factorsTest, loanTrain, loanTest = sl.traintestsplit( factors, loanDefaulter, testsize=0.2, shuffle=True) scaler = StandardScaler() factorsTrain = scaler.fittransform(factorsTrain)
from sklearn.preprocessing import OneHotEncoder one=OneHotEncoder(categorical_features=[0,4,5,6]) X=one.fit_transform(X) X=X.toarray() #Scaling now the feature matrix from sklearn.preprocessing import StandardScaler sc=StandardScaler() X=sc.fit_transform(X) from sklearn.tree import DecisionTreeClassifier dtf=DecisionTreeClassifier(max_depth=25)#incresase precission dtf.fit(X,y) dtf.score(X,y) test=pd.readcsv("test.csv") X_test=test.iloc[:,[1,2,5,6,7,8,10]].values #feature matrix y_test=dtf.predict(X_test) from sklearn.metrics import precision_score,recall_score,f1_score precision_score(y_test,y_pred) recall_score(y_test,y_pred) f1_score(y_test,y_pred)
def Recognize(fname): pd.readcsv(fname)
in the shape of ["State","RegionName"].''' states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'} quarterly_data = (pd.readcsv('City_Zhvi_AllHomes.csv').drop(['RegionID','Metro','CountyName','SizeRank'],axis=1) .sort_values(['State','RegionName'],ascending=[True,True])) quarterly_data['State'] = quarterly_data['State'].map(states) quarterly_data.set_index(['State','RegionName'],inplace=True) quarterly_data.drop(quarterly_data.columns[0:45],axis=1,inplace=True) quarterly_data = quarterly_data.groupby(pd.PeriodIndex(quarterly_data.columns,freq='Q'),axis=1).mean() print(quarterly_data) convert_quarterly_data() def run_ttest(): '''First creates new data showing the decline or growth of housing prices between the recession start and the recession bottom. Then runs a ttest comparing the university town values to the non-university towns values, return whether the alternative hypothesis (that the two groups are the same) is true or not as well as the p-value of the confidence. '''
data.append(cur_feature) data = pd.concat(data, axis=0) return data # In[ ]: ## test if __name__ == '__main__': gt_path = './data/gt.csv' doc_folder = '.data/docs/' table_gt = pd.readcsv(gt_path) pdfnames = table_gt['filename'].unique() # table_gt is the grouna truth table # cols: filename, region_id, table_id, page_n, x1, y1, x2, y2, cor_x1, cor_y1, cor_x2, cor_y2 data = {} for i in range(len(pdfnames)): pdfname = pdfnames[i] data[pdfname] = get_table_feature(table_gt, doc_folder, pdfname) # the format of data[pdfname] is a dataframe, with cols of: # [row,top,bottom,to_pre,to_next,row_ele,row_min,row_max,match_pre,match_next,top_to_line,bottom_to_line,spaceratio,label] # In[ ]: ''' ## to build models on these feature, just do: X = data[pdfname].iloc[:, 1:-1]
def load_csv(): return pandas.readcsv(CSV_FILE_LOCATION)
import numpy as np import scipy import pandas as pd import sklearn as sk import matplotlib as plt # Set some constant values that will never change avg_work_hrs_yr = 40 * 52 #TODO set function for hours in order to account for non-standard work weeks # set initial hours # needs to be based on time hours = pd.readcsv('work_hours.csv') hr_per_week = pd.readcsv('work_week.csv') # set hourly rate rates = pd.readcsv('hourly_rate.csv') deduction = pd.readcsv('deductions.csv') # deductions block from other budget file # DEDUCTION CALCULATION LEADING TO USEABLE INCOME ''' def deduction_calc(): if 0.00 < total_gross_income <= 9525.99: ded_taxes = (total_gross_income - deductions()) * 0.10 ded_taxes = round(ded_taxes, 2) return ded_taxes
import pandas as pd article = pd.readcsv('G:\news_article.csv')
import pandas as pd dataframe = pd.readcsv("01110.csv") datafram.head()
def holi(to_mins, route): if (to_mins >= 480 and to_mins < 540): traffic = pd.read_csv('sloth1.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 540 and to_mins < 600): traffic = pd.read_csv('sloth2.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 600 and to_mins < 660): traffic = pd.read_csv('sloth3.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 660 and to_mins < 720): traffic = pd.readcsv('sloth4.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 720 and to_mins < 780): traffic = pd.readcsv('sloth5.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 780 and to_mins < 840): traffic = pd.readcsv('sloth6.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 840 and to_mins < 900): traffic = pd.readcsv('sloth7.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 900 and to_mins < 960): traffic = pd.readcsv('sloth8.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 960 and to_mins < 1020): traffic = pd.readcsv('sloth9.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 1020 and to_mins < 1080): traffic = pd.readcsv('sloth10.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 1080 and to_mins < 1140): traffic = pd.read_csv('sloth11.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 1140 and to_mins < 1200): traffic = pd.read_csv('sloth12.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) elif (to_mins >= 1200 and to_mins < 1260): traffic = pd.read_csv('sloth13.csv') X = traffic['to_minutes'].values X = X.reshape(-1, 1) if (route == 1): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins) #print (str(int(ans))+" minutes") elif (route == 2): Y = traffic['car1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by car") ans1 = reg.predict(to_mins) #print (str(int(ans))+" minutes") Y = traffic['bus1'].values Y = Y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) #print("Predicted Time by bus") ans2 = reg.predict(to_mins)
def populateFileArray(nameFile): df = pandas.readcsv(nameFile)
import pandas df = pandas.readcsv('excsv.csv') df.to_csv(excsv1.csv)
import pandas as pd for patient in range(0,10): data1 = pd.readcsv("Data/Patient", patient, "2019-01-15_TimeSeries_Muse-EEG_0.csv") print("Data/Patient" + str(patient) + "/2019-01-15_TimeSeries_Muse-EEG_0.csv") AllData[i] = data1 AllData[]
#plots plt.figure() sb.lineplot(Average.index, Average.Houses) plt.figure() sb.lineplot(Aheating.index.hour, Aheating) sb.lineplot(Aheating[Aheating.index.month == 8].index.hour, Aheating[Aheating.index.month == 8]) plt.figure() sb.lineplot(Hheating.index.hour, Hheating) sb.lineplot( Hheating[Hheating.index.month == 6 and Hheating.index.month == 7 and Hheating.index.month == 8].index.hour, Hheating[Hheating.index.month == 8]) #save datframe to json k = df.iloc[:50, :5].to_json(orient='index') import json with open('dataframe.json', 'w') as f: json.dump(k, f) #read json f = open('dataframe.json') data = json.load(f) f.close() data = pd.read_json('dataframe.json', orient='index') pd.readcsv('Fle')
import pandas as pd #imports data file mpg = pd.readcsv("jp-us-mpg.dat", delim_whitespace = True) mpg.head() mpg.tail() from nupy import mean #Drops missing and finds mean mean(mpg["Japan"].dropna()) mean(mpg["US"].drop.na()) from numpy import var us = mpg["US"].dropna() jp = mpg["Japan"].dropna() jp_var = var(jp) (len(jp) / float(len(jp) - 1) us_var = var(us) (len(us) / float(len(jp) - 1) summation : (xsubi - mean)^2 / (n-1) (20.1 - 30.5) / (sqrt((41.1/149) + (37.3/79))) = 12/94 P avelue #Two sided ptest 2* (1.0 - t.cdf(abs(12.946), 136.8750))
import pandas df = pandas.readcsv('infor.csv') df.to_csv(infor1.csv)
plt.show() return def writePCADataToCSV(csvData): with open('PCAData.csv', 'w') as writeFile: writeFile.write("X-Values" + "," + "Y-Values") writeFile.write("\n") for entries in csvData: writeFile.write("" + str(entries[0]) + "," + str(entries[1]) + "") writeFile.write("\n") writeFile.close() if __name__ == '__main__': originalMatrix = pd.readcsv() # to calculate the eigen values and eigen vectors covar_matrix = PCA(n_components=2) # We perform data preprocessing using StandardScaler() originalMatrix = StandardScaler().fit_transform(originalMatrix) principalComponents = covar_matrix.fit_transform(originalMatrix) principalDf = pd.DataFrame( data=principalComponents, columns=['principal component 1', 'principal component 2']) x = principalComponents[:, 0] y = principalComponents[:, 1] # performing the visualization scatterPlotVisualizer(x, y)
import pandas as pd import numpy as np import matplotlib as plt countries = pd.readcsv("countries.csv") currencies = pd.readcsv("currencies.csv")
def print_lat_lan(filename, delay): geocordinates = pd.readcsv("filename") for i in geocordinates: time.sleep(delay) print("%s: %s" % ({"Eid": (geocordinates[0], geocordinates[1])}))