def worker(inpt): print(inpt) offset, chunksize, path = inpt df, meta = pyreadstat.read_sav(path, row_offset=offset, row_limit=chunksize) # df, meta = pyreadstat.read_file_in_chunks(pyreadstat.read_sav, path, offset=offset, chunksize=chunksize, # multiprocess=True, num_processes=10) return df
def worker(inpt): import pyreadstat offset, chunksize, path = inpt df, meta = pyreadstat.read_sav(path, row_offset=offset, row_limit=chunksize) return df
def extract_sav_data(sav_file, ioLocale='en_US.UTF-8', ioUtf8=True, engine='savReaderWriter'): """ see parse_sav_file doc """ if engine == 'savReaderWriter': with sr.SavReader(sav_file, returnHeader=True, ioLocale=ioLocale, ioUtf8=ioUtf8) as reader: thedata = [x for x in reader] header = thedata[0] dataframe = pd.DataFrame.from_records(thedata[1:], coerce_float=False) dataframe.columns = header for column in header: if isinstance(dataframe[column].dtype, np.object): # Replace None with NaN because SRW returns None if casting dates fails (dates are of type np.object)) values = dataframe[column].dropna().values if len(values) > 0: if isinstance(values[0], str): dataframe[column] = dataframe[column].dropna().map( str.strip) elif isinstance(values[0], str): # savReaderWriter casts dates to str dataframe[column] = dataframe[column].dropna().map( str.strip) # creating DATETIME objects should happen here return dataframe elif engine == 'readstat': df, meta = pyreadstat.read_sav(sav_file) return df
def get_data(filename, survey_name, variables): variables = get_lit_variables(survey_name, variables) df, meta = pyreadstat.read_sav(filename, apply_value_formats=True, usecols=variables) df['survey'] = survey_name df.columns = df.columns.str.upper() return [df, meta]
def spss_to_csv(source_filename, dest_fileobj): df, meta = pyreadstat.read_sav(source_filename) df.to_csv( dest_fileobj, float_format='%g', index=False, line_terminator='\r\n', )
def scdb_sav_to_dataframe(scdb_sav_path): try: dataset = pd.read_spss(scdb_sav_path) except (PyreadstatError, ReadstatError): dataset, _ = pyreadstat.read_sav(scdb_sav_path, apply_value_formats=True, encoding='iso-8859-1') return dataset
def read_exportCSV(filepath: str): ''' 這邊只處理SAV ''' # ----sav if filepath.endswith('sav'): try: dt, meta = pyreadstat.read_sav(filepath) except: try: dt, meta = pyreadstat.read_sav(filepath, encoding='Big5-HKSCS') except Exception as e: return {filepath: e} # label col = dt.columns value_lab = [] # 處理及插補value label for i in col: try: target = meta.variable_value_labels[i] target = {str(key): target[key] for key in target} str1 = str(target) #str1.replace(',',', \n') except: str1 = '' value_lab.append(str1) labelDF = pd.DataFrame({ 'col_name': col, 'col_lab': meta.column_labels, 'val_lab': value_lab }) labelDF.to_csv(re.sub(r"(?<=\.).*", 'label.csv', filepath), encoding='utf_8_sig', index=False) dt.to_csv(re.sub(r"(?<=\.).*", 'csv', filepath), encoding='utf_8_sig', index=False)
def feed(self): dataframe, meta = pyreadstat.read_sav(self.data_file) data = dataframe.drop(dataframe.columns[269:525], axis=1).drop(dataframe.columns[526:], axis=1) weights = dataframe.drop(dataframe.columns[1:268], axis=1) # df(dataframe).to_sql("raw_data", self.engine, if_exists='replace', index=False) DataFrame(data).to_sql("questionnaire_data", self.engine, if_exists='replace', index=False) DataFrame(weights).to_sql("weights_data", self.engine, if_exists='replace', index=False)
def iso_farest(test_data, max_samples): df, meta = pyreadstat.read_sav( "/Users/alireza/project/DMTM/flask/er0827t.sav") for column in df: if df[column].isnull().values.all(): df.drop(columns=column, axis=1, inplace=True) else: labels = imp.imputation(df[column], "mean") df[column] = pd.DataFrame(labels) rng = np.random.RandomState(42) outlier_df = [] for column in df: if df[column].isnull().values.all(): df.drop(columns=column, axis=1, inplace=True) else: labels = imp.imputation(df[column], "mean") df[column] = pd.DataFrame(labels) mean = df[column].mean() std = df[column].std() tmp = rng.uniform(low=mean + 5 * std, high=10 * std, size=(100, 1)) outlier_col = [] for data in tmp: outlier_col.append(data[0]) outlier_df.append(outlier_col) outlier_df = pd.DataFrame(outlier_df).transpose() # df_all=pd.DataFrame( np.concatenate( (df.values, outlier_df.values), axis=0 ) ) # df_all.columns=df.columns train, test = train_test_split(df, test_size=0.2) # train=df[:9999] # test=df[10000:] # anomaly = outlier_df # training the model clf = IsolationForest(max_samples=max_samples, random_state=rng) clf.fit(train) y_pred_test = clf.predict(test_data) # y_pred_outliers = clf.predict(anomaly) # new, 'normal' observations ---- # normal_accuracy = (list(y_pred_test).count(1) / y_pred_test.shape[0]) # outliers ---- # outlier_accuracy = (list(y_pred_outliers).count(-1) / y_pred_outliers.shape[0]) return y_pred_test
def get_data_with_filename(file): """Get metadata and data from the .sav file. Args: file (str): .sav filename. Returns: data, meta (tuple): data """ try: data, meta = pyreadstat.read_sav(file, apply_value_formats=True, encoding="ISO-8859-1") return data, meta except FileNotFoundError as FE: print("Need to investigate ", file) return None, None
def main(): print('Loading data...') # load raw data df, meta = pyreadstat.read_sav(raw_file) raw_df = df[['caseid'] + lib_qs + con_qs + demographics] # give each text response its own row df = get_unique_text(raw_df) # load coded data data = pd.read_csv(coded_file, index_col=0, sep='\t') data['userid'] = [str(uid) for uid in data['userid']] # keep just the columns we need data = data[['userid', 'topic', 'position', 'authentic']] # merge data all_data = data.merge(df, on=['userid', 'topic', 'position'], how='inner') print('%s observations loaded.' % len(all_data)) features, networks = get_features(all_data) # merge features into existing df merged_df = all_data.merge(features, on='rid', how='inner') # reindex to make life easier df, networks = reindex(merged_df, networks) # Write data to file print('Writing features and survey data to file.') df.to_csv('yougov_data.txt', sep='\t') # calculate distances valid_pairs = get_pairs(df) distance_df = get_distances(valid_pairs, networks) distance_df.to_csv('distances.txt', sep='\t') print('Distance calculations saved to file.')
def parse_file(filename): if "sav" in filename.lower(): df, meta = pyreadstat.read_sav(filename, apply_value_formats=True, metadataonly=True) return df, meta, False elif "por" in filename.lower(): df, meta = pyreadstat.read_por(filename, apply_value_formats=True, metadataonly=True) return df, meta, False elif "sas7bdat" in filename.lower(): df, meta = pyreadstat.read_sas7bdat(filename, metadataonly=True) return df, meta, False elif "xpt" in filename.lower(): df, meta = pyreadstat.read_xport(filename, metadataonly=True) return df, meta, False elif "dta" in filename.lower(): df, meta = pyreadstat.read_dta(filename, apply_value_formats=True, metadataonly=True) return df, meta, False else: return None, None, True
#!/usr/bin/env python # coding: utf-8 # In[54]: import pandas as pd import numpy as np import pyreadstat as ps # In[11]: df, meta = ps.read_sav("D:/KOMSTAT/Pert2/komstat1.sav") # In[193]: df.head(10) # In[15]: df.shape # In[70]: df.columns # In[117]: df.dtypes # In[118]:
linewidth=1) if threshold is not None: ax.plot(x_grid, threshold * np.ones_like(stability_selection.lambda_grid), 'b--', linewidth=0.5) ax.set_ylabel('Stability score') ax.set_xlabel('Lambda / max(Lambda)') fig.tight_layout() return fig, ax df, meta = pyreadstat.read_sav("Final Data, May 3.sav") imp = KNNImputer(missing_values=np.nan) df = df.select_dtypes(include=['float32', 'float64', 'int']) # df.insert(3, "num2", num2) targetIndex = -1 df = df.iloc[pd.isna(df.iloc[:, targetIndex]).values == False, :] # df = df.drop(columns=["Num1"]) imp.fit(df) vars = df.columns[range(len(df.columns) - 1)] df = imp.transform(df) # df = pd.DataFrame(vals, columns=vars) # df = df[["WBC0", "Plt0", "Mg0", "Age", "Ca0", "BMI", "Na0", "P0", "HB0", "AST0", "PH0", "ALT0", "CRP0_Quantitative", "HeartFailure0", "Nausea0", "WeaknessFatigue0", "Cough0", "K0", "PR0", "Cr0", # "COVID19_outcome"]] X = np.round(df[:, range(0, df.shape[1] - 1)]) Y = np.round(df[:, targetIndex])
args=np.append(main_args, [(length * 7 // 8, length), res_8])) procs.append(proc_8) for proc in procs: proc.start() for proc in procs: proc.join() estimates = np.concatenate( (res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8)) return estimates if __name__ == '__main__': df, meta = pyreadstat.read_sav('conjoint_host_sim_dummy.sav') for n in range(1, 8): df[f'T{n}_select'] = df[f'T{n}_select'].astype(int) features = np.delete( np.unique(list(map(lambda x: x[x.rindex('_') + 1:], df.columns[2:]))), -1) df_diff = diff_model(df, features) with pm.Model() as logistic_model: pm.glm.GLM.from_formula('target ~ {0}'.format(' '.join( list(map(lambda x: str(x) + ' ' + '+', df_diff.columns[:-1])))[:-2]), data=df_diff, family=pm.glm.families.Binomial()) trace_logistic_model = pm.sample(2000, step=pm.NUTS(), chains=1,
# df_run_sps=df_run.loc[df_run.Extension==".sps"] # df_run_xls=df_run.loc[df_run.Extension==".xls"] # df_run_xlsx=df_run.loc[df_run.Extension==".xlsx"] # df_run_xml=df_run.loc[df_run.Extension==".xml"] # df_run_csv=df_run.loc[df_run.Extension==".csv"] ############################################################################ # Lecture des fichiers sav import pyreadstat #df1, meta1 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2011\Decembre11\Données_FSMS_nov11_26_12.sav") #df2, meta2 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2011\Janvier11\Données\FSMS_2011_4-2-11_URBAN.sav") #df3, meta3 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2011\Janvier11\Données\FSMS_2011_RURAL_FINAL.sav") df4, meta4 = pyreadstat.read_sav( r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\Mauritania FSMS data\2011\Juin11" ) #df5, meta5 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2012\Analyse croise SA_NUT\RIM_FSMS_SMART_juil2012.sav") #df6, meta6 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2012\Decembre\Donnes_FSMSdec12_HH_commun.sav") df7, meta7 = pyreadstat.read_sav( r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2012\Juin\Données_FSMS_juil_12.sav" ) #df8, meta8 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2013\Decembre\Données FSMS 13Dec_20_01_14.sav") df9, meta9 = pyreadstat.read_sav( r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2013\Juin\FSMS_HH_juil13b_1.sav" ) #df10, meta10 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2014\Decembre\Données_FSMS_24_06_15.sav") df11, meta11 = pyreadstat.read_sav( r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2014\Juin\Données_FSMS_juin_2014.sav" ) #df12, meta12 = pyreadstat.read_sav(r"C:\Users\simon\Documents\Simon\Data4Good\9_GeoWatch\2015\Decembre\Données FSMS Jan16_18_02.sav")
print(df[feature_name].min()) result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value) return result if __name__ == "__main__": if (len(sys.argv) != 3): sys.stderr.write('Usage: "{0}" $csvFileName $IndexOfX1\n'.format( sys.argv[0])) os._exit(1) if (".csv" in sys.argv[1]): data = pd.read_csv(sys.argv[1]) else: data, meta = pyreadstat.read_sav(sys.argv[1]) # Replace All space in column headers data.rename(columns=lambda name: name.replace(" ", "_"), inplace=True) # get column headers such as [keyX1, keyX2, keyX3, ..., keyY] keyList = data.columns.values[int(sys.argv[2]):] print("data readed table \n", data) #x = data.values #returns a numpy array #min_max_scaler = preprocessing.MinMaxScaler() #x_scaled = min_max_scaler.fit_transform(x) #data = pd.DataFrame(x_scaled) data = normalize(data[keyList[1:]]) print(data) data = data.iloc[:, 1:6].values
# -*- coding: utf-8 -*- """ Created on Sat Jun 8 23:21:28 2019 @author: garci """ 'CONVERT SPSS FILES (.SAV) TO PYTHON DATAFRAME (LINE 12) PASS AS .CSV FILE (LINE 14)' import pyreadstat import pandas as pd 'FILE' filename = 'experim.sav' folder = 'data/' df, meta = pyreadstat.read_sav(folder + filename) 'prints dataframe df' print(df) 'converts .sav file to .csv and places it in script path' df.to_csv(filename[:-4] + '.csv')
# list all files in folder list_all_files = [] for path, subdirs, files in os.walk(root_folder): for name in files: list_all_files.append(os.path.join(path, name)) # list data files list_data_file = [f for f in list_all_files if re.search('.sav$', f)] return (list_data_file) fsms_file = list_fsms_file() data, meta = pyreadstat.read_sav(fsms_file[5], apply_value_formats=True, encoding="ISO-8859-1") def clean_gps_coord(string): string = string.replace('<Point><coordinates>', '') string = string.replace('</coordinates></Point>', '') return (string) def extract_latitude(string): lon = string.split(',', 2)[0] lat = string.split(',', 2)[1] return lat, lon
records_ = df.to_dict(orient='records') r = json.dumps(records_) loaded_r = json.loads(r) who_say.insert_many(loaded_r) #--------------World Opinion Survey------------------------------------------------------- import pandas as pd import pyreadstat import pymongo from pymongo import MongoClient client = MongoClient('localhost', 27017) #--------- Import db from sav --------------------------------------- df_WV2, meta_WV2 = pyreadstat.read_sav("WV2.sav", apply_value_formats=True) df_WV3, meta_WV3 = pyreadstat.read_sav("WV3.sav", apply_value_formats=True) df_WV4, meta_WV4 = pyreadstat.read_sav("WV4.sav", apply_value_formats=True) df_WV5, meta_WV5 = pyreadstat.read_sav("WV5.sav", apply_value_formats=True) df_WV6, meta_WV6 = pyreadstat.read_sav("WV6.sav", apply_value_formats=True) #--------- call local host and create new data----------------------- db = client.new_data spss = db.spss #--------- labels ----------------------------------------------------- df_WV2.columns = meta_WV2.column_labels df_WV3.columns = meta_WV3.column_labels df_WV4.columns = meta_WV4.column_labels df_WV5.columns = meta_WV5.column_labels
'NT1010', 'NT2010', 'NT1058', 'NT2058', 'NT1137', 'NT2137', 'NT1097', 'NT2097', 'NT99999999', 'NT9999' ] name_csv='GFC_SelfReport.sav' os.chdir('/Users/paulsharp/Documents/Dissertation_studies/data') df_full,meta=pyreadstat.read_sav(name_csv,user_missing=True, apply_value_formats=False) GFC_Subs=[] for sub in df_full.NT_ID: sub=sub[-3:] GFC_Subs.append(sub) #create conversion dictionary called power_bb from Power 264 to Big Brain 300 parcellation os.chdir(path_to_subs) with open('convert_Power_to_bigbrain.csv', 'r') as f: r=reader(f) lines=[l for l in r] for row in lines:
import threading def worker(inpt): print(inpt) offset, chunksize, path = inpt df, meta = pyreadstat.read_sav(path, row_offset=offset, row_limit=chunksize) # df, meta = pyreadstat.read_file_in_chunks(pyreadstat.read_sav, path, offset=offset, chunksize=chunksize, # multiprocess=True, num_processes=10) return df start_ts = time() # calculate the number of rows in the file _, meta = pyreadstat.read_sav("Surgery.sav", metadataonly=True) numrows = meta.number_rows # calculate number of cores in the machine, this could also be set manually to some number, i.e. 8 # calculate the chunksize and offsets chunksize = 200 offsets = [indx * chunksize for indx in range(math.ceil(numrows / chunksize))] # pack the data for the jobs jobs = [(x, chunksize, "Surgery.sav") for x in offsets] threads = [] max_threads = 30 while threads or jobs: for thread in threads: if not thread.is_alive(): threads.remove(thread)
import pandas as pd from pandas import DataFrame, Series import numpy as np import matplotlib.pyplot as plt import pyreadstat df1, meta = pyreadstat.read_sav( '/Users/lazarus/galvanize/datasets/osfstorage-archive/CCAM SPSS Data.sav') df1.to_csv('/Users/lazarus/galvanize/capstone_1/data/climate_survey_data.csv') ''' end product: bar graph of each static demographicgraphic's % change in belief static demographics: 'gender'__________2 'generation'______6 'income_category'_3 'race'____________4 'party_x_ideo'____6 SPECIAL CASE ***'religion'_15*** SPECIAL CASE ***'evangelical'_4*** # look up evangelical environmental movement ''' class DemographicAnalysis: def __init__(self, df): self.df = df1[df1.year != 1] # get rid of 2008 because it's weird def split_demographics(self, df): # columns_to_split = input('Which categories do you want to split? ') <----interative for future users columns_to_split = [ 'gender', 'generation', 'income_category', 'race', 'party_x_ideo' ] # choose which demographics to separate from their column categories
# Read Excel File Data = pd.read_excel("PATH_TO_FILE(or)URL_FOR_THE_FILE/example.xls",sheetname="Name") # Read SAS File Data = pd.read_sas('example.sas7bdat') # Read Stata File Data = pd.read_stata('example.dta') # Read R Data File result = pyreadr.read_r('C:/Users/sampledata.RData') print(result.keys()) # let's check what objects we got Data = result["df1"] # extract the pandas data frame for object df1 # Read SQL Table from Sqlite3 with .db exension conn = sqlite3.connect('C:/Users/Deepanshu/Downloads/flight.db') query = "SELECT * FROM flight" Data = pd.read_sql(query, con=conn) # Read Data from SPSS File Data, meta = pyreadstat.read_sav("file.sav", apply_value_formats=True) """Modules to be used based on size of the data - Pandas - small DataSets upto 1 GB - Dask - Medium DataSets upto XX GB - Vaex - Large DataSets upto TB's """ # Example data = pd.read_csv("https://raw.githubusercontent.com/Mineria/Titanic/master/csv/train.csv") """# Preprocessing Basic preprocessing Steps:
####################################################################### ##### El presente scrip cumple con la función de convertir los archivos de SPSS (`.sav`) en CSV ###### # 1) Para cumplir dicho objetivo leemos desde el directorio de minio el cual tiene alojados los archivos `.sav` #[/home/ubuntu/Rucas/data/sav/] # 2) Luego de eso debemos agregar el nombre del archivo que transformaremos [reemplazar `<filename>`] # 3) Por último le asignamos nombre al archivo que guardaremos como `.csv` [reemplazar `<name>`] import pyreadstat as prst # 1) #################################################### 2) ######### df0, meta = prst.read_sav( '/home/ubuntu/Rucas/data/sav/W1 BdM BBDD HH 20 04 24.sav') ############# 3) ##################################################### df0.to_csv( '/home/ubuntu/Rucas/data/dir_path/csv/tab/W1 BdM BBDD HH 20 04 24.csv', sep=',', float_format='%g', encoding='utf-8', index=False) ################################################ , sep=',', float_format nos quita los decimales '12.0' a '12' #### # En caso de no poder ejecutar este documento: http://ezcsv2sav.com/about/ "
import pandas as pd import pyreadstat def worker(inpt): import pyreadstat offset, chunksize, path = inpt df, meta = pyreadstat.read_sav(path, row_offset=offset, row_limit=chunksize) return df # calculate the number of rows in the file _, meta = pyreadstat.read_sav("big.sav", metadataonly=True) numrows = meta.number_rows # calculate number of cores in the machine, this could also be set manually to some number, i.e. 8 numcores = mp.cpu_count() # calculate the chunksize and offsets divs = [ numrows // numcores + (1 if x < numrows % numcores else 0) for x in range(numcores) ] chunksize = divs[0] offsets = [indx * chunksize for indx in range(numcores)] # pack the data for the jobs jobs = [(x, chunksize, "big.sav") for x in offsets] pool = mp.Pool(processes=numcores) # let's go!
import pyreadstat from imblearn.combine import SMOTETomek from imblearn.over_sampling import RandomOverSampler from imblearn.under_sampling import RandomUnderSampler from scipy.spatial import distance, distance_matrix from sklearn.impute import SimpleImputer import pandas as pd from skbio import DistanceMatrix import numpy as np from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score from sklearn.model_selection import StratifiedShuffleSplit, train_test_split from sklearn.utils import class_weight from xgboost import XGBClassifier from imblearn import pipeline as pl df, meta = pyreadstat.read_sav("CMS.sav") cols = df.columns imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent') df = df.select_dtypes(include=['float32', 'float64', 'int']) imp.fit(df.values) df = imp.transform(df.values) CMS = np.round(df[:, -1]) # df = df.select_dtypes(include=['float32', 'float64', 'int']) X = df[:, 0:df.shape[1] - 1:1] CMS = CMS - 1 CMS[CMS == -1] = 1 Xd = pd.DataFrame(X) duplicatedItem = Xd.duplicated(keep='first') X = X[duplicatedItem == False, :]
Created on Fri Jun 19 14:45:18 2020 @author: hsuwei """ import os import re from pathlib import Path import pandas as pd import pyreadstat os.chdir('C:/Users/user/Desktop/LifeHistory') # input df96, meta96 = pyreadstat.read_sav('./data/1996/tscs1996q2.sav', apply_value_formats=False, formats_as_category=False )
# import pandas, numpy, and pyreadstat import pandas as pd import numpy as np import pyreadstat pd.set_option('display.max_columns', 5) pd.options.display.float_format = '{:,.2f}'.format pd.set_option('display.width', 75) # retrieve spss data, along with the meta data nls97spss, metaspss = pyreadstat.read_sav('data/nls97.sav') nls97spss.dtypes nls97spss.head() nls97spss['R0536300'].value_counts(normalize=True) # use column labels and value labels metaspss.variable_value_labels['R0536300'] nls97spss['R0536300'].\ map(metaspss.variable_value_labels['R0536300']).\ value_counts(normalize=True) nls97spss = pyreadstat.set_value_labels(nls97spss, metaspss, formats_as_category=True) nls97spss.columns = metaspss.column_labels nls97spss['KEY!SEX (SYMBOL) 1997'].value_counts(normalize=True) nls97spss.dtypes nls97spss.columns = nls97spss.columns.\ str.lower().\ str.replace(' ','_').\ str.replace('[^a-z0-9_]', '') nls97spss.set_index('pubid__yth_id_code_1997', inplace=True) # apply the formats from the beginning
import matplotlib.pyplot as plt import pandas as pd import pyreadstat from pew_crosstab import CrossTab from pew_bars import BarNums from cramer_chi import ContTabs w68spss, metaspss = pyreadstat.read_sav('../data/W68.sav', apply_value_formats=True, formats_as_category=True) fields = [ 'COVIDFOL_W68', 'COVIDCOVER1_W68', 'COVIDFACTS_b_W68', 'COVIDNEWSCHNG_a_W68', 'COVIDNEWSCHNG_c_W68', 'COVIDNEWSCHNG_d_W68', 'COVIDNEWSCHNG_e_W68', 'COVIDINFODIFF_W68', 'COVIDLOCINFO_W68', 'COVIDDEAL_W68', 'COVIDPLANHRD_W68', 'COVIDPLANTRUE_W68', 'COVIDPLANWATCH_W68', 'F_METRO', 'F_EDUCCAT', 'F_PARTY_FINAL' ] facets = ['F_METRO', 'F_EDUCCAT', 'F_PARTY_FINAL'] answers = [ 'COVIDFOL_W68', 'COVIDCOVER1_W68', 'COVIDFACTS_b_W68', 'COVIDNEWSCHNG_a_W68', 'COVIDNEWSCHNG_c_W68', 'COVIDNEWSCHNG_d_W68', 'COVIDNEWSCHNG_e_W68', 'COVIDINFODIFF_W68', 'COVIDLOCINFO_W68', 'COVIDDEAL_W68', 'COVIDPLANHRD_W68', 'COVIDPLANTRUE_W68', 'COVIDPLANWATCH_W68' ] df68 = pd.DataFrame(w68spss)