def read_css(): """I may never have written a more painful function in my life. If you want data, and are thinking of using numpy or pandas -- read it in by hand. """ # Can't delete array elements. #ix = 1 #so as to skip the header row. n = 21 chunk = 2*n + 3 # Both matrices, and 3 extra rows advice = np.zeros((n, n, n)) friendship = np.zeros((n, n, n)) pdf = pd.read_csv("/Users/alexloewi/Documents/Data/cognitive social structures/rearranged_cogsocstr.txt", sep="\t") matrix_columns = pdf[pdf.columns[0:21]] #print 'matrix columns!!!!!!,', matrix_columns for perceiver in range(n): # This gets all the data for one person x = (chunk*perceiver) y = (chunk*(perceiver+1))-1 a = np.array(matrix_columns.ix[x:x+20]) np.fill_diagonal(a, 0) f = np.array(matrix_columns.ix[x+21:x+41]) np.fill_diagonal(f, 0) advice[perceiver,:,:] = a #np.array(matrix_columns.ix[0:20]) friendship[perceiver,:,:] = f #np.array(matrix_columns.ix[21:41]) # Consensus matrices (AND rule) ca = np.zeros((n,n)) cf = np.zeros((n,n)) for i,j in np.ndindex(ca.shape): if advice[i,i,j] + advice[j,i,j] == 2: ca[i,j] = 1 for i,j in np.ndindex(cf.shape): if friendship[i,i,j] + friendship[j,i,j] == 2: cf[i,j] = 1 # Self-proclaimed relationships (OR rule) sa = np.zeros((n,n)) sf = np.zeros((n,n)) for i,j in np.ndindex(sa.shape): if advice[i,i,j] + advice[j,i,j] >= 1: sa[i,j] = 1 for i,j in np.ndindex(sf.shape): if friendship[i,i,j] + friendship[j,i,j] >= 1: sf[i,j] = 1 return advice, friendship, ca, cf, sa, sf
def _readData(self, filePath): df = pd.read_csv(filePath) if self.config.numClasses == 1: labels = df["sentiment"].tolist() elif self.config.numClasses > 1: labels = df["rate"].tolist() review = df["review"].tolist() reviews = [line.strip().split() for line in review] return reviews, labels
def sensores(): lista = [] df = pd.read_csv("../mqtt/test/log.csv") tamanho = len(df) dado = (np.array(df[tamanho - 5:tamanho]).tolist()) n_dado = [] for i in range(len(dado)): #for j in range(len(dado[i])): n_dado = "IoT ID: %s\nMES: %s\nDIA: %s\nHora: %s\nUmidade: %s\n*************" % ( dado[i][0], dado[i][1], dado[i][2], dado[i][3], dado[i][4]) lista.append(n_dado) #print lista return lista
def load_data(cat_filename): """Loads a given number of rows from catalog and return test and train""" # load catalog data, convert the data in to a function so that we can process it cat_data = pd.read_csv(cat_filename, delim_whitespace=True, comment='#', header=None) # give column names cat_data.columns = ['id','redshift','tu','tg','tr','ti','tz','ty',\ 'u10','uerr10','g10','gerr10','r10','rerr10',\ 'i10','ierr10','z10','zerr10','y10','yerr10'] return cat_data
def read(self): try: if len(file_csv) == 1: self._rows = csv.DictReader(open(self.csv_file)) else: # Use PD df_merged = pd.concat( [pd.read_csv(f, sep=',') for f in self.csv_file], ignore_index=True, sort=False) self._rows = df_merged except Exception as ERR: print('[CRITICAL] ' + str(ERR))
def readDataFrame(filepath): df = pd.read_csv(filepath) # The last column should be the labels, which leaves an even number of columns. nb_col = len(df.columns) mid = (nb_col - 1) / 2 Y = df[-1] # Interacts or not ? Labels X1 = df[1:mid] # First protein representations X2 = df[mid:-1] # Second protein representations # The respective order of the labels MUST be conserved # Reshape with numpy ! # Data must be converted into a vector processable by sklearn # Based on what I've read on conjoined triads, it should be the case already. return (X1, X2, Y)
def height(carats, height_range, vec, min_height, safety_step): df = pd.read_csv(carats, names = ('time', 'airplanename', 'y','x','z','keisiki')) plane_list = list(df['airplanename']) plane_name = list(set(plane_list)) listed_data = [] for p in plane_name: plane_sort = df[df['airplanename'] == p] plane_sort = plane.sort_values(by = ["time"], ascending = True) i1 = None i2 = None rows = [] for i, row in plane_sort.iterrows(): rows.append(row) for i in range(safety_step*2): if i >= lne(rows): break row = rows[i] judge = False listed_data.append(list(row)+[judge]) for i in range(safety_step*2, len(rows)): row = rows[i] i1 = rows[i-safety_step] i2 = rows[i-safety_step * 2] ii = row judge = True if not (abs(ii[]'z'] - i1['z']) < height_range and abs(ii['z'] - i2['z']) < height_range): judge = False else: v1 = cp.vector(i1['x'] - i2['x'], i1['y']-i2['y']) v2 = cp.vector(ii['x'] - i1['x'], i1['y']-i1['y']) v1.normalization() v2.normalization() dot = v1.dot(v2) if vec >= dot: judge = False elif ii['z'] <= min__height: judge = False listed_data.append(list(row) + [judge])
def __init__(self, data_path, learning_rate=0.001, reward_decay=0.9, e_greedy=0.9): self.learning_rate = learning_rate self.gamma = reward_decay self.features = pd.read_csv(data_path, sep=',')#need preprocess self.epsilon = e_greedy self.step_index = 0 self.cost = [] self.memory_count = 0 self.INITIAL_EPSILION = e_greedy self.FINAL_EPSILON = 0.01 self.BATCHSIZE = 20 self.EXPLORE = 300000 self.OBSERVE = 1000 self.MAXSTEP = 100000 self.MAXSIZE = 9000 self.DEPTH = 3 self.replay_buffer = deque() self.q_table[0] = pd.Dataframe() self.q_table[1] = pd.Dataframe()
def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05): if target_symbol is not None: return [ StockDataSet(target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) ] # Load metadata of s & p 500 stocks info = pd.read_csv( "data/constituents-financials.csv") #must prepare a sp 500 file info = info.rename( columns={col: col.lower().replace(' ', '_') for col in info.columns}) info['file_exists'] = info['symbol'].map( lambda x: os.path.exists("data/{}.csv".format(x))) print(info['file_exists'].value_counts().to_dict()) info = info[info['file_exists'] == True].reset_index(drop=True) info = info.sort('market_cap', ascending=False).reset_index(drop=True) if k is not None: info = info.head(k) print("Head of S&P 500 info:\n", info.head()) # Generate embedding meta file info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False) return [ StockDataSet(row['symbol'], input_size=input_size, num_steps=num_steps, test_ratio=0.05) for _, row in info.iterrows() ]
def k_means_pp(...): np.random.seed(0) args=command_line_arg() K,N,d,MAX_ITER,filename=args.K, args.N, args.d,args.MAX_ITER, args.filename observation_file=pd.read_csv(filename) observation_file=observation_file.values.tolist() observation_list=[] for x in observation_file: obs=observation(x) observation_list.append(obs) found_centroid=[] #step 1 - choose random M first_seed=np.random(N,1); if(first_seed==1): return 1 else: for j in range(2,K): found_centroid.append(observation_list[first_seed]) calculate_d(found_centroid, observation_list) p_vector=create_p_vector(observation_list) first_seed=np.random(N,1,p_vector) if(len(found_centroid))
import panda as pd data_infile = '../40-pn-dataset-auto.csv' data = pd.read_csv(data_infile)
# -*- coding:utf-8 -*- # /usr/bin/python ''' Author:Yan Errol Email:[email protected] Wechat:qq260187357 Date:2019-05-13--11:26 File: Describe: 数据相关性分析, ''' print(__doc__) import panda as pd # Step 0 - Read the dataset, calculate column correlations and make a seaborn heatmap data = pd.read_csv( 'https://raw.githubusercontent.com/drazenz/heatmap/master/autos.clean.csv') corr = data.corr() ax = sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap=sns.diverging_palette(20, 220, n=200), square=True) ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
import panda data1 = panda.read_csv('/home/rikesh/Citytech/Self/test/test.json', sep="\t") print(data1)
def textprocessing(): commentList = [] dateList = [] for i in range(10): num = i + 1 [commentList_temp, dateList_temp] = getComments(num) commentList.append(commentList_temp) dateList.append(dateList_temp) commentList = reduce(operator.add, commentList) dateList = reduce(operator.add, dateList) dataTmp = {'comments': commentList[:], 'date': dateList[:]} df2 = pd.DataFrame(dataTmp) pd.DataFrame(df2).to_excel("text-movie.xls", sheet_name="sheet1", index=False, header=True) comments = '' for k in range(len(commentList)): comments = comments + (str(commentList[k])).strip() pattern = re.compile(r'[\u4e00-\u9fa5]+') filterdata = re.findall(pattern, comments) # 过滤标点 用正则表达式 cleaned_comments = ''.join(filterdata) seg_list_exact = jieba.cut(cleaned_comments, cut_all=False) # 精确模式分词 object_list = [] remove_words = pd.read_csv("stopwords.txt", index_col=False, quoting=3, sep="\t", names=['stopword'], encoding='utf-8') for word in seg_list_exact: # 循环读出每个分词 if word not in remove_words: # 如果不在去除词库中 object_list.append(word) # 分词追加到列表 # 词频统计 word_counts = collections.Counter(object_list) # 对分词做词频统计 word_counts_top10 = word_counts.most_common(10) # 获取前10最高频的词 print(word_counts_top10) # 输出检查 # 词频展示 mask = np.array(Image.open('background.jpg')) # 定义词频背景 wc = wordcloud.WordCloud( background_color='white', # 设置背景颜色 font_path='/System/Library/Fonts/Hiragino Sans GB.ttc', # 设置字体格式 mask=mask, # 设置背景图 max_words=200, # 最多显示词数 max_font_size=100, # 字体最大值 scale=32 # 调整图片清晰度,值越大越清楚 ) wc.generate_from_frequencies(word_counts) # 从字典生成词云 image_colors = wordcloud.ImageColorGenerator(mask) # 从背景图建立颜色方案 wc.recolor(color_func=image_colors) # 将词云颜色设置为背景图方案 wc.to_file("/Users/ownpro/Desktop/temp.jpg") # 将图片输出为文件 plt.imshow(wc) # 显示词云 plt.axis('off') # 关闭坐标轴 plt.show() # 显示图像
import panda as pd df = pd.read_csv('pokemon.csv') print(df.head(3) print(df.tail(3) df1 = pd.read_excel('pokemen.excel') print(df.columns) // headers df['name'] df['name','type] df.iloc[1:4] // rows from 1 to 4 df.iloc[2,1] // second row first position for index,row in df.iterrows(): print(index,row) // show rows by index for index,row in df.iterrows(): print(index,row['Name'] // show name columns df.loc[df['type 1] == 'fire'] //finding specific textual information df.describe() //mean and different stats df.sort_values('name') // sort values by name df.sort_values([name', 'hp'], ascending= False) // sort values by two columns
import numpy as np import panda as pa import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = (20.0, 10.0) data = pa.read_csv('headerbrain.csv') print(data.shape) data.head()
import panda as pd pd.read_csv("../data/gapminder.tsv, sep = '\t') #rea the csv using the Tab separator
def clear_na_from_csv(csv_source, csv_dest): existing_csv = pd.read_csv(csv_source) csv_without_na = existing_csv.dropna() csv_without_na.to_csv(csv_dest)
y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) return score def dice_loss(y_true, y_pred): loss = 1 - dice_coeff(y_true, y_pred) return loss def bce_dice_loss(y_true, y_pred): loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred) return loss ### Load Data ### ### Read CSV df_trn_val = pd.read_csv('Kaggle_Car_Data/train/train_masks.csv') ### Extract IDs ids_trn_val = df_trn_val['img'].map(lambda s: s.split('.')[0]) ### Train Data IDs and Validation Data IDs ids_train_split, ids_valid_split = train_test_split(ids_trn_val, test_size = 0.2, random_state = 42) print('Training on {} samples'.format(len(ids_train_split))) print('Validating on {} samples'.format(len(ids_valid_split))) ### 4070 vs. 1018 ### Paths kaggle_train_path = 'Kaggle_Car_Data/train/train' kaggle_train_mask_path = 'Kaggle_Car_Data/train_masks/train_masks' kaggle_test_path = 'Kaggle_Car_Data/test/test'
import panda as pd import numpy as np from sklearn.feature_extraction.text import CounterVectorizer from sklearn.metrics.pairwise import Cousine_simlarity def get_title_from_index(index): return df[df.index == index]["title"].values[0] def get_title_from_index(index): return df[df.title == title]["index"].values[0] df = pd.read_csv("") features = ['keywords','cast','genres','director'] for feature in features: df[feature] = df[feature].fillna('') def combine_features(row): try: return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director'] except: print("Error", row) df["combined_features"] = df.apply(combine_features,axis=1) #print(df["combined_features"].head()) cv = CountVectorizer() count_matrix = cv.fit_transform(df["combined_features"]) cosine_simi = cosine_similarity(count_matrix)
from time import sleep from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.options import Options from bs4 import BeautifulSoup import panda as pd df = pd.read_csv("https://covid19.who.int/WHO-COVID-19-global-table-data.csv") df = df.drop([ 'WHO Region', 'Cases - cumulative total per 100000 population', 'Cases - newly reported in last 7 days per 100000 population', 'Deaths - cumulative total per 100000 population', 'Deaths - newly reported in last 7 days', 'Deaths - newly reported in last 7 days per 100000 population', 'Transmission Classification' ], axis=1) df = df.set_index('Name') df['Recovered'] = df['Cases - cumulative total'] - df[ 'Deaths - cumulative total'] df = df.drop( ['Cases - cumulative total', 'Deaths - newly reported in last 24 hours'], axis=1) inp = input("Digite o país: ") df.loc[[inp]] import matplotlib.pyplot as plt recovered = df.at[inp, 'Recovered'] Deaths = df.at[inp, 'Deaths - cumulative total']
'''Intro to ML''' import panda as pd # read data and store data in DF data = pd.read_csv(csv_file_path, index_col='Id') # print a summary statistics of data data.describe() # prints the first couple of row of data data.head() # print list of columns in the dataset data.columns # drops missing values data.dropna(axis=0) # selecting prediction target with dot-notation -- stored as Series y = data.Target # selecting features with a column list features = ['f_1', 'f_2', ...] X = data[features] '''Build ml models with scikit-learn (sklearn) for DF data 0) Split training data into training and validation data - the validation data measures the model's accuracy - once a model is selected, predict on testing data 1) Define the model 2) Fit model to make prediction
import panda as pd dataset = pd.read_csv('225245.csv')
import panda as pd df = pd.read_csv("temperatures.csv") df.head(2) #(2) stands for how many rows you want it to show df.head() # shows only the head list df.tail() # df.values df["temperature"] df["day"].head() df["temperature"] > 20 # shows all the temperatures below 20 degrees from the temperatures.csv file df["temperature"] < 0 # shows all the temperatures below 0 degrees from the file df_cool = df[df["temperature"] < 0] df_cool.head() df_cool.to_csv("cool.csv") # saves a new csv file named cool.csv #save stuff into a new file df["temperature"].mean() # this shows the average df["temperature"].max() # this shows the maximum temperature df["temperature"].min() # this shows the minimum temperature df["temperature"].value_counts( ) # it counts how many times a certain value occurs in a row/list df["temperature"].value_counts().head() snacks = pd.Series(["Mars", "Twix", "Oreo"])
import panda as pd df = pd.read_csv('C:/Users/iamay/Desktop/PY/data/iris.csv') x = df['Species'] x.unique() a = list(x)
import panda as pd from gensim.models import Word2Vec import gensim, logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # w2v预训练 data=pd.read_csv('../user_data/tmp_data/all_data_txt.txt',header=None) w2v=Word2Vec(data[0].apply(lambda x:x.split(' ')).tolist(),size=128, window=8, iter=50, min_count=2, sg=1, sample=0.002, workers=6 , seed=1018) # 保存模型 w2v.wv.save_word2vec_format('../user_data/pretraining_model/w2v_128.txt') ## transfomer预训练----------------------- from transformers import BertTokenizer, WEIGHTS_NAME,TrainingArguments from model.modeling_nezha import NeZhaForSequenceClassification,NeZhaForMaskedLM from model.configuration_nezha import NeZhaConfig import tokenizers from datasets import load_dataset,Dataset from transformers import ( CONFIG_MAPPING, MODEL_FOR_MASKED_LM_MAPPING, AutoConfig, AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling, HfArgumentParser, Trainer, TrainingArguments, set_seed,
#importing required libraries from sklearn.cluster import KMeans import panda as pd from numpy np from collections import Counter import csv import os from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt #reading dataset data = pd.read_csv('/root/task5/pro.csv') #dropping the unusefull data = data.dropna() data = data.drop(['url'], axis='columns',implace=True) ip= data['IP'] count=Counter(ip) #scalling the dataset sc = StandardScaler() data_scaled = sc.fit_transform(dataset) model = KMeans(n_clusters=4) #fitting the model model.fit(data_scaled) pred = model.fit_predict(data_scaled) dataset_scaled = pd.DataFrame(data_scaled, columns=['IP', 'c']) pred=dataset_scaled['cluster'] #plotting the clusters f1 = data[data.cluster==0] f2 = data[data.cluster==1]
# IMPORTING LIBRARIES import panda as pd import numpy as np from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt df = pd.read_csv('california_housing_test') df.head() y = df['total_rooms'] x = df[['longitude', 'latitude', 'housing_median_age']] def CasesReg(x, y): from sklearn.linear_model import LinearRegression reg = LinearRegression() reg.fit(x, y) return reg x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y) reg = CasesReg(x_train, y_train) y_pred = reg.predict(x_test) y_pred reg.score(x_test, y_test) plt.plot(x, reg.predict(x), '*') plt.legend(labels=['longitude', 'latitude', 'housing_median_age'])
# import panda as pd import numpy as np # read in data train = pd.read_csv()
### Code doesn't execute as given import panda as pd ### You mean pandas? import numpy as np import matplotlib from matplotlib import pyplot as plt votes = [] for line in open ("ELECTION_ID"): year = line.split(" ")[0] header = pd.read_csv(year + ".csv", nrows = 1).dropna(axis = 1) d = header.iloc[0].to_dict() ### The following line can't even be executed df = pd.read_csv(year+".csv", index_col = 0,thousands = ",", skiprows = [1]) df.rename(inplace = True, columns = d) df.dropna(inplace = True, axis = 1) df["Year"] = 2004 votes.append(df) for year in range(len(votes)): majorVote = pd.concat([votes[year][['Democratic','Republican','Total Votes Cast','Year']]], axis = 1).head(1) majorVote['Republican Vote Share'] = majorVote['Republican']/majorVote['Total Votes Cast'] if year == 0: voteShare = pd.concat([majorVote], axis = 1) else: voteShare = pd.concat([voteShare, majorVote], axis = 0) ax = voteShare.plot(x = 'Year', y = "Republican Vote Share", \ title = "President General Election Results in Accomack County, Virginia")
import panda as pd import sklearn # read in dataset data = pd.read_csv('Dateset.csv') data.head() # create an indicator column for overpayment data['Overpayment_ind'] = (data.Overpayment_Amount != ' ').astype(int) data.columns # construct and design matrix from patsy import dmatrices y, X = dmatrices('Overpayment_ind ~ Age_in_yrs + Income + \ Rent_Amount + Number_Children + C(Moved_from_out_of_state_12_mths) + \ C(Citizenship) + C(Previous_Felony)', data, return_type='dataframe') print X.columns # rename indicator columns to a mroe readable form X = X.rename(columns = {'C(Moved_from_out_of_state_12_mths)[T.Y]':'Moved_from_out_of_state_12_mths', 'C(Citizenship)[T.Verified]':'Citizenship_Verified', 'C(Previous_Felony)[T.Y]':'Previous_Felony'}) X.head() import numpy as np from sklearn.linear_model import LogisticRegression ya = np.ravel(y) model = LogisticRegression() model = model.fit(X,ya)
from tensorflow.keras.layers import Dense #Dense Layer from tensorflow.keras.models import Sequential #whatever we do to the model will happen in sequence import panda as pd # can call panda as pd in my code #get data data = pd.read_csv('linear.csv', header=0, index_col=0) #print(data.head()) #verify and make sure it's printing the correct info indicies = data.index.values #print(indices) values = data['value'.values] #we want the column value and its values print(values) model = Sequential() model.add(Dense(8, input_shape=(1, ))) #adding a layer (input + hidden) #8= nodes , (1, TBD (however many rows)) model.add(Dense(32)) #add another layer with 32 nodes. Grabs the shape of the previous layer #LSTM - model that allows you to pass in states... model.add(Dense(1)) #OUTPUT LAYER = number of predictions you want model.compile(optimizer ='adam', loss='mae') #compile the model. can add optimizer(adam- minimizes errors) and loss function (mae - Mean absolute error) model.fit(indicies, values, epochs=1, batch_size=1) #epochs = 1 iteration through the entire data set #batch size - send 1 data at a time and update your weights
import panda as pd from keras.models import Sequential from keras.layers import Dense, Dropout previsores = pd.read_csv('entradas-breast.csv') classe = pd.read_csv('saidas-breast.csv') classificador = Sequential() classificador.add( Dense(units=8, activation='relu', kernel_initializer='normal', input_dim=30)) classificador.add(Dropout(0.2)) classificador.add( Dense(units=8, activation='relu', kernel_initializer='normal')) classificador.add(Dropout(0.2)) classificador.add(Dense(units=1, activation='sigmoid')) classificador.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy']) classificador.fit(previsores, classe, batch_size=10, epochs=100) classificador_json = classificador.to_json() with open('classificador_breast.json', 'w') as json_file: json_file.write(classificador_json) classificador.save_weights('classificador_breast.h5')
import panda as pd import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # Load the dataset default = pd.read_csv('default.csv') # Inspect the dataset default.head() # Convert the student attribute into dummy variables 0s and 1s default = pd.get_dummies(default, drop_first=True) # Set the RNG seed so that we get reproducible results # Splitting is random process # Seed value doesn't really matter np.random.seed(0) # Grab our input and output data x_data = default[default.columns.difference(['default'])] y_data = default['default'] # We can now split this into training and testing sets x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3) # Initialize the model without changing any of the default parameters model = LogisticRegression() # Fit the model on our training data
mat[3:5,:] .sum() #Get the sum of all the values in mat .std() #get the standard deviation of the values in mat .sum(axis=0) #Get the sum of all the columns in mat .sqrt(arr) .mean() .min() .max() .count() .exp(arr) .describe() .transpose() .info() .sin(arr) .log(arr) NP can do regular operation * / + - < > <= >= != Pandas use to convert different type data to table and use advance function to filter and remap the data import panda as pd #read and write to csv file df = pd.read_csv('file_name') df = df.to_csv('example', index=False) #Excel Input and output, beware of image in the excel file, it may cause it to crash pd.read_excel('Excel_Sample.xlsx',sheetname='Sheet1') pd.to_excel('excelname.xlsm', sheetname) #Html Input df = pd.read_html('http://.....html') #Read Database in sql from sqlalchemy import create_engine engine = create_engine('sqlite:///:momory:') df.to_sql('data', engine) sql_df = pd.read_sql('data', con=engine) #Convert to DataFrames df= pd.DataFrame(np.random(5,4), index='A B C D E'.split(), columns='W X Y Z'.split()) df['W'] or df['W', 'Z'] #to call one or more column