import sys # NB snakemake runs script from /workflow directory sys.path.append('scripts/analysis') from load_dfs import DfLoader from analysis import run_analyses # load the dataframes DfLoad = DfLoader(snakemake.input.data_dir) both_df = DfLoad.eng_both() run_analyses([ { 'name': 'has_objc', 'df': both_df, 'index': 'eng_TAMsimp', 'columns': 'has_objc', 'examples': [], }, { 'name': 'has_loca', 'df': both_df, 'index': 'eng_TAMsimp', 'columns': ['has_loca'], 'examples': [], }, { 'name': 'has_time', 'df': both_df, 'index': 'eng_TAMsimp', 'columns': ['has_time'], 'examples': [],
import sys # NB snakemake runs script from /workflow directory sys.path.append('scripts/analysis') from load_dfs import DfLoader from analysis import run_analyses # load the dataframes DfLoad = DfLoader(snakemake.input.data_dir) eng_df = DfLoad.eng_agree() # features needed for selections main_genre = ['prose', 'poetry', 'prophetic'] main_dom = ['Q', 'N'] run_analyses([ { 'name': 'clause_type', 'df': eng_df, 'index': 'eng_TAM', 'columns': 'clause_type', }, { 'name': 'clause_rela', 'df': eng_df, 'index': 'eng_TAM', 'columns': 'clause_rela', }, { 'name': 'clause_rela', 'df': eng_df, 'index': 'eng_TAM',
import sys import pandas as pd # NB snakemake runs script from /workflow directory sys.path.append('scripts/analysis') from load_dfs import DfLoader from analysis import run_analyses # load the dataframes DfLoad = DfLoader(snakemake.input.data_dir) agg_df = DfLoad.eng_simp_agree() esv_df = DfLoad.esv() niv_df = DfLoad.niv() eng_df = DfLoad.eng_both() disag_df_simp = DfLoad.eng_simp_disagree() disag_df = DfLoad.eng_disagree() def sum_top_values(df): """Sums the top values of dataframes.""" top = 0.015 pr_df = df / df.sum() top_pr = pr_df.loc[pr_df['sum'] >= top] top_ct = df.loc[top_pr.index] top_sum = pd.DataFrame(top_ct.sum()) sum_pr = pd.DataFrame(top_pr.sum()) data = { 'top_ct': top_ct, 'top_pr': top_pr, 'top_sum': top_sum, 'top_sum_pr': sum_pr,
import sys # NB snakemake runs script from /workflow directory sys.path.append('scripts/analysis') from load_dfs import DfLoader from analysis import run_analyses # load the dataframes DfLoad = DfLoader(snakemake.input.data_dir) eng_df = DfLoad.eng_agree() esv_df = DfLoad.esv() niv_df = DfLoad.niv() both_df = DfLoad.eng_both() disag_df = DfLoad.eng_disagree() run_analyses([ { 'name': 'eng_tenses', 'df': eng_df, 'index': 'eng_TAM', }, { 'name': 'esv_tenses', 'df': esv_df, 'index': 'esv_TAM', }, { 'name': 'niv_tenses', 'df': niv_df, 'index': 'niv_TAM', }, {
import pandas as pd from load_dfs import DfLoader DfLoad = DfLoader( '/Users/cody/github/CambridgeSemiticsLab/Gesenius_data/results/csv/qtl') qatal_df = DfLoad.df_safe() eng_df = DfLoad.eng_agree() esv_df = DfLoad.esv() niv_df = DfLoad.niv() disag_df = DfLoad.eng_disagree() print(disag_df.shape) print(disag_df.head()) #test = pd.pivot_table( # eng_df, # index='eng_TAM', # columns='has_objc', # aggfunc='size', # fill_value=0, # #dropna=False, #) print(test)
import sys # NB snakemake runs script from /workflow directory sys.path.append('scripts/analysis') from load_dfs import DfLoader from analysis import run_analyses # load the dataframes DfLoad = DfLoader(snakemake.input.data_dir) esv_df = DfLoad.esv() # features needed for selections main_genre = ['prose', 'poetry', 'prophetic'] main_dom = ['Q', 'N'] run_analyses( [ { 'name': 'clause_type', 'df': esv_df, 'index': 'esv_TAM', 'columns': 'clause_type', }, { 'name': 'clause_rela', 'df': esv_df, 'index': 'esv_TAM', 'columns': 'clause_rela', }, { 'name': 'cltype_simp', 'df': esv_df,
import sys # NB snakemake runs script from /workflow directory sys.path.append('scripts/analysis') from load_dfs import DfLoader from analysis import run_analyses # load the dataframes DfLoad = DfLoader(snakemake.input.data_dir) niv_df = DfLoad.niv() # features needed for selections main_genre = ['prose', 'poetry', 'prophetic'] main_dom = ['Q', 'N'] run_analyses([ { 'name': 'clause_type', 'df': niv_df, 'index': 'niv_TAM', 'columns': 'clause_type', }, { 'name': 'clause_rela', 'df': niv_df, 'index': 'niv_TAM', 'columns': 'clause_rela', }, { 'name': 'cltype_simp', 'df': niv_df,