def read_archive(input_archive, archive_type, mode, sample2cat, input_dir): """read archive""" if not os.path.exists(input_dir): os.mkdir(input_dir) if archive_type == "zip": archive = zipfile.open(input_archive) namelist = archive.namelist() if archive_type == "tar.gz": archive = tarfile.open(input_archive, "r:gz") namelist = archive.getnames() sample_file_names, sample_names = get_sample_names(namelist) for tf, sfn in zip(namelist, sample_file_names): extracted = archive.extractfile(tf) with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out: for line in extracted: sample_file_out.write(line) extracted.close() #create sample table if sample2cat is not None: sample_cat = pd.read_cvs(sample2cat, index_col=0, sep="\t") #replace index with cleaned file names sample_cat.index.rename( str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)])) sample_table = pd.DataFrame( [sample_file_names, sample_cat.loc[sample_file_names, ]]) sample_table.columns = ["sample_file_name", "category"] else: sample_table = pd.DataFrame(sample_file_names) sample_table.columns = ["sample_file_name"] sample_table.index = sample_names sample_table.index.name = "sample_name" sample_table.to_csv("%s/sample_table.txt" % input_dir, sep="\t")
def readEdgeList(filename): edgelist = pd.read_csv(filename) if len(edgelist.columns) != 2 : return "Edge list should have 2 columns" edgelist = pd.read_cvs(filename, usecols = [0,1]) dataframe = pd.DataFrame(edgelist) else: dataframe = pd.DataFrame(edgelist) return dataframe
import pandas as pd print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") print("") print("=======================") print("== Running importcsv ==") print("=======================") print("") fields=['ID','Name','Payment Type','Amount','Pay To','Date','Details'] print("---- Reading from Payments.cvs ----") paymentsdf=pd.read_cvs('Payments.cvs',skipinitialspace=True,usecols=fields,encoding="ISO-8859-1") print(paymentsdf)
import pandas as pd import pandas_datareader.data as web import datetime as dt start = dt.datetime(2017, 1, 1) end = dt.date.today() apple_stock = web.DataReader('AAPL', 'yahoo', start, end) # Download sp500 stock ticker = [] data = pd.read_cvs('SP500.csv', header=None) for item in data[0]: # column 0 of data ticker.append(item) for name in ticker: vars()[name] = web.DataReader(name, 'yahoo', start, end)
import pandas as pd from Bio.Blast.Applications import NcbiblastxCommandline seq_desconhecida = input("sequencia desconhecida:") seq_proteinas = input("proteina Trypanosoma cruzi:") blast_x = "/Users/Juliana/Documents/Juliana/programacao/blastx.exe" arquivo_blast = r"/Users/Juliana/Documents/Juliana/programacao/Arquivo Blasta.TAC3.txt" comparacao = NcbiblastxCommandline(cmd = blast_x ,query = seq_desconhecida, subject = seq_proteinas, evalue = 0.05, outfmt = 6, out = arquivo_blast) stdout, stdeer = comparacao() blast_resultado = pd.read_cvs("/Users/Juliana/Documents/Juliana/programacao/Arquivo Blasta.TAC3.txt",) maximo = blast_resultado.sort_values("Bitscore") print(maximo.iloc[[-1]])
################################################################# #IO_ext Ej_08; print("Ingreso lectura de bloque de datos con metodo WITH desde otro archivo JavaScript_Object_Notation read - rw"); with open("JavaScript_Object_Notation_with.json","r") as read_file:# abre el archivo JavaScript_Object_Notation para lectura en bloque json_en_memoria2 = json.load(read_file); print(json_en_memoria2); nuevo(13); ################################################################# ################################################################# #IO_ext Ej_08; #################### CSV(texto separado por comas) print(""" ╔═════════════════════════════════════════════════════════════════════════════╗ ║ ║ ║ CVS ║ ║ ║ ╚═════════════════════════════════════════════════════════════════════════════╝ """); import pandas as pd ejemplo_desde_plan_calculo.csv ################################################################# #IO_ext Ej_08; print("Ingreso lectura de bloque de datos desde otro archivo CSV read - rw"); archivo_de_csv=pd.read_cvs("ejemplo_desde_plan_calculo.csv",header=0)# abre el archivo JavaScript_Object_Notation para lectura en bloque print(archivo_de_csv); print(Dato_3); nuevo(14,"fin");
import numpy as np import pandas as pd from sklearn import preprocessing from sklearn.feature_extraction.text import CountVectorizer, TfidVectorizer from sklearn.model_selection import train_test_split, KFold from nltk.corpus import stopwords from nltk.stem.snowball import SnowballStemmer import matplotlib from matplotlib import pyplot as plt %matplotlib inline %config InLineBackend.figure_format = retina data = pd.read_cvs("../input/train.csv")
def append_stock_cvs(file_name, dataframe): df = pd.read_cvs(file_name) df.append(dataframe) df.to_csv('stock_info.csv')
import pandas from pandas.tools.plotting import scatter_matrix import matplotlib.pyplot as plt from sklearn import model_selection from sklearn.metrics import confusion_matrix from sklearn.linear_model import LogisticRegression url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] dataset = pandas.read_cvs(url, names=names) print(dataset.shape) print("\n") print(dataset.describe())
header_dic = {} for header in unique_header: r = re.compile(header + ".+") header_match = list(filter(r.match, table_headers)) header_dic[header] = header_match data_frame_dic = {} for header in header_dic: data_frame_list = [] for file in header_dic[header]: data_frame_list.append(master_index[file]) data_frame_dic[header] = data_frame_list for header in data_frame_dic: combine_frame = pd.concat(data_frame_dic[header]) print combine_frame #combine_frame= combine_frame.fillna(0) combine_frame.to_csv("combined_data/" + str(header) + ".csv", index=False) combiner() path = os.path.expanduser("~/Documents/senior_project/providers.txt") for f in os.listdir(path): if os.stat(f).st_mtime < now - 7 * 86400: if os.path.isfile(f): os.remove(os.path.join(path, f)) down_detector = pd.read_csv("downdetector.csv") internet_traffic_report = pd.read_csv("internettrafficreport.csv") is_it_down_right_now = pd.read_csv("isitdownrightnow.csv") is_the_service_down = pd.read_csv("istheservicedown.cvs") outage_report = pd.read_cvs("outage.csv")
from keras.wrappers.scikit_learn import KerasClassifier # from sklearn.model_selection import cross_val_score #train test şekilde parçalayıp accury değeri üretiyor from keras.models import Sequential # yapay sinir Ağ oluşturmak için gerekli from keras.layers import Dense, Input, Dropout, Activation # katmanları inşa etmemizi sağlayan yapı import keras from keras.optimizers import SGD from sklearn.impute import SimpleImputer import pandas as pd import numpy as np veri = pd.read_cvs("kanserTespiti.data") veri.replace('?', -9999, inplace="true") veriYeni = veri.drop(["1000025"], axis=1) imp = SimpleImputer(missing_values=-9999, strategy="mean") veriYeni = imp.fit_transform(veriYeni) giris = veriYeni[:, 0:8] cikis = veriYeni[:, 9] model = Sequential() model.add(Dense(64, input_dim=8)) model.add(Activation("relu")) model.add(Dense(64)) model.add(Activation("relu")) model.add(Dense(32)) model.add(Activation("softmax")) model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy']) model.fit(giris, cikis, epochs=5, batch_size=32, validation_split=0.13)
import pandas as pd import matplotlib.pyplot as plt import numpy as np df=pd.read_cvs('Realestate.csv') df.head() #train data destribution plt.scatter(train.X1 transaction date,train.Y house price of unit area,color='blue') plt.xlabel("X1 transaction date") plt.ylabel("Y house price of unit area") plt.show() #Modeling #using sklearn package to model data from sklearn import linear_model regr = linear_model.LinearRegression() train_x = np.asanarrray(train[['X1 transaction date']]) train_y = np.asanarray(train[['Y house price of unit area']]) regr.fit(train_x,train_y) #Plot output plt.scatter(train.X1 transaction date,train.Y house price of unit area,color='blue') plt.xlabel("X1 transaction date") plt.ylabel("Y house price of unit area") plt.show()
import pandas as pd import plotly.offline as pyo import plotly.graph_objs as go # Load CVS file from the Datasets folder df = pd.read_cvs('../Datasets/CoronaTimeSeries.cvs') df['Date'] = pd.to_datetime(df['Date']) # Preparing data data = [go.Scatter(x=df['Date'], y=df['Confirmed'], mode='lines', name='Death')] # Preparing layout layout = go.Layout(title='Corona Virus Confirmed Cases From 2020-01-22 to 2020-03-17', xaxis_title="Date", yaxis_title="Number of cases") # Plot the figure and saving in a html file fig = go.Figure(data=data, layout=layout) pyo.plot(fig, filename='linechart.html')
#install tensorflow pip install tensorflow #import required libraries import tensorflow as tf from tensorflow import keras import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import preprocessing # Obtain data from kaggle of pretrained pekon "pikachu" fro prediction of our model #read the cvs file using the pandas df = pd.read_cvs('C:\Users\Owe\Desktop\tf\pokemon') #check for boolean 0/1 for the legendary player df['isLegendary'] = df['isLegendary'].astype(int) #create dummy variables to hold the attacks techniques such as water n grass etc #pd.get_dummies creates a new dataFrame from the categories #the dummy variables makes sure we don't alloct intergers not relevant to the game properties def dummy_creation(df, dummy_categories): for i in dummy_categories: df_dummy = pd.get_dummies(df[i]) df = pd.concat([df,df_dummy],axis=1) df = df.drop(i, axis=1) return(df) df = dummy_creation(df, ['Egg_Group_1', 'Body_Style', 'Color','Type_1', 'Type_2']) #Upnext Splitting and normalizing data #we will split our data into training and testing data, Let's use pekomen generation of
from flask import Flask import pandas as pd app = Flask(__name__) dataset = pd.read_cvs('cardio_train.cvs') @app.route("/", methods=['POST']) def predictRisks(): return render_template('insurer_profile.html', predictiontext="hello{}") if __name__ == "__main__": app.run()
import pandas as pd url = "https://archive.ics.uci.edu/ml/machine-learning-database/iris/iris.data" #load dataset into Pandas DataFrame df = pd.read_cvs(url, names=[ 'sepal length', 'sepal width', 'petal length', 'petal width', 'target' ]) #Standardize the Data from sklearn.preprocessing import StandardScaler features = [ 'sepal length', 'sepal width', 'petal length', 'petal width', 'target' ] #Separating out features x = df.loc[:, features].values #Separating out the target y = df.loc[:, ['target']].values #Standardizing the features x = StandardScaler().fit_transformation(x) #See Jupyter notebook
# -*- coding: utf-8 -*- """ Created on Sat Mar 10 14:02:57 2018 @author: 김아람 """ import pandas as pd train = pd.read_cvs('input/train.cvs') test = pd.read_csv('input/test.cvs') train.head() train.info()
import pandas as pd #read csv file df = pd.read_cvs('example.csv') df.to_csv('My_output', index=False) pd.read_cvs('My_output') #read excel pd.read_excel('Excel_Sample.xlsx', sheet_name='Sheet1') #save excel file df.to_excel('Excel_Sample2.xlsx', sheet_name='NewSheet') #read html data = pd.read_html('http://www.fdic.gov/bank/individual/failed/banklist.html') #work with SQL from sqlalchemy import create_engine engine = create_engine('sqlite:///:memory:') df.to_sql('my_table', engine) sqldf = pd.read_sql('my_table', con=engine)
# Import data from .csv file fname = os.path.join("earthpy-downloads", "avg-precip-months-seasons.csv") avg_monthly_precip = pd.read_csv(fname) avg_monthly_precip #%% filename = 'streamflow_week4.txt' filepath = os.path.join('../data',filename) print(os.getcwd()) print(filename) avg_monthly_precip = pd.read_cvs(filepath) # %% data.columns #All of the data. Not most helpful, only shows 10 rows (first 5 last 5) #Does include number or rows and columns though print (data) #The first 5 rows data.head() #Last five rows data.tail() #Super helpful! Tells us what each column is made up of data.info() #normal, tells us the dimensions of array. data.shape