コード例 #1
0
def baseline(setting='color'):	
	#get features and labels 
	img_names = get_filename_list('../data/groupdataset_release/file_names.txt')

	print "Extracting features..."
	if setting == 'color': 
		X = color_histogram('../data/groupdataset_release/images', img_names)
	elif setting == 'pixel':
		X = pixel_extractor('../data/groupdataset_release/resize_images', img_names)
	elif setting == 'bb': 
		X = bb_extractor('../data/groupdataset_release/annotations/all', img_names)
	else:
		pass
	Y = get_label_matrix('../data/groupdataset_release/image_annotations.csv')

	#split into train and test 
	print "Splitting into train and test set..."
	X_train, X_test, Y_train, Y_test = sklearn.cross_validation.train_test_split(X, Y, test_size=0.2)

	#initialize svm
	class_names = {'none': 1, 'low': 2, 'moderate': 3, 'high': 4}
	sentiments = ['interaction', 'focus', 'happiness']
	for i in xrange(Y_train.shape[1]):
		print "Fitting svm...."
		svm_model = svm.SVC(kernel="linear", decision_function_shape='ovr', max_iter=10000)
		svm_model.fit(X_train, Y_train[:,i])
		print "Predicting..."
		y_predict_train = svm_model.predict(X_train)
		y_predict = svm_model.predict(X_test)

		analysis.run_analyses(y_predict_train, Y_train[:,i], y_predict, Y_test[:,i], class_names, sentiments[i])
コード例 #2
0
from load_dfs import DfLoader
from analysis import run_analyses

# load the dataframes
DfLoad = DfLoader(snakemake.input.data_dir)
both_df = DfLoad.eng_both()

run_analyses([
    {
        'name': 'has_objc',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'has_objc',
        'examples': [],
    },
    {
        'name': 'has_loca',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['has_loca'],
        'examples': [],
    },
    {
        'name': 'has_time',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['has_time'],
        'examples': [],
    },
], snakemake.output.dir)
コード例 #3
0
# NB snakemake runs script from /workflow directory
sys.path.append('scripts/analysis')
from load_dfs import DfLoader
from analysis import run_analyses

# load the dataframes
DfLoad = DfLoader(snakemake.input.data_dir)
both_df = DfLoad.eng_both()

run_analyses([
    {
        'name': 'verb_stem',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'stem',
    },
    {
        'name': 'verb_person',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'person',
        'examples': [],
    },
    {
        'name': 'is_stative',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['esv_is', 'niv_is']
    },
], snakemake.output.dir)
コード例 #4
0
ファイル: main.py プロジェクト: noticeable/VisualSentiment
def main():
    ##Torso Extraction##
    # img_path = "../data/groupdataset_release/images/4940922642_5dab04b030_o.jpg"
    # torso_extractor = TorsoExtractor()
    # torso_list, image = torso_extractor.detect_torsos(img_path)

    # ## Face Extraction ##
    # # img_path = "../data/groupdataset_release/images/Library3.jpg"
    # # face_extractor = FaceExtractor()
    # # faces_lists, image = face_extractor.detect_faces(img_path)
    # # for face_list in faces_lists:
    # # 	for (x,y,w,h) in face_list:

    # extract_faces = False
    # extract_missed_faces = False
    # if extract_faces:
    # 	src_path = '../data/GENKI-R2009a/Subsets/GENKI-4K/files'
    # 	dest_path = './cache/GENKI_faces'
    # 	image_util.extract_GENKI_faces(src_path, dest_path)

    # if extract_missed_faces:
    # 	src_path = '../data/GENKI-R2009a/Subsets/GENKI-4K/files'
    # 	dest_path = './cache/GENKI_faces/GENKI_faces_looser_bounds'
    # 	image_util.extract_missed_faces(dest_path)

    #SVM Training
    # img_path = "../data/GENKI-R2009a/Subsets/GENKI-4K/GENKI-4K_Images_Reduced.txt"
    # labels_path = "../data/GENKI-R2009a/Subsets/GENKI-4K/GENKI-4K_Labels_Reduced.txt"
    # img_path2 = '../data/groupdataset_release/images'
    # faces_path = '../data/groupdataset_release/faces'

    # train_again = False
    # if train_again:
    # 	svm = EmotionSVM(img_path, labels_path, img_path2, 'sad', dump=True)
    # 	svm.train()
    # 	# svm = train_smile_extractor(img_path, labels_path)
    # 	# joblib.dump(svm, 'svm_model.pkl')
    # else:
    # 	pass
    # 	# print 'Loading svm...'
    # 	# svm = EmotionSVM(img_path, labels_path, img_path2, 'smile', fit=False)
    # 	# all_face_features = get_all_face_features(img_path2, faces_path, svm)
    # 	# print all_face_features.shape
    # 	# np.save('../data/groupdataset_release/face_features.npy', all_face_features)

    # poselet_path = '../data/groupdataset_release/all_poseletes_hq'
    # all_poselet_features = get_all_poselet_features(poselet_path)
    # print all_poselet_features.shape
    # np.save('../data/groupdataset_release/poselet_features.npy', all_poselet_features)

    # basepath = '../data/groupdataset_release/annotations/all'
    # img_names = os.listdir(basepath)
    # svm = joblib.load('./svm_models/svm_orient_model.pkl')
    # X = get_image_orientation_features(svm)

    print "Extracting features..."
    X = construct_full_feature_matrix(only_poselet=True)
    Y = get_label_matrix('../data/groupdataset_release/image_annotations.csv')

    # binary = True
    # if binary:
    # 	Y[Y == 1] = 0
    # 	Y[Y == 2] = 0
    # 	Y[Y == 3] = 1
    # 	Y[Y == 4] = 1

    print "Splitting into train and test set..."
    X_train, X_test, Y_train, Y_test = sklearn.cross_validation.train_test_split(
        X, Y, test_size=0.2)

    # class_names = {'none': 1, 'low': 2, 'moderate': 3, 'high': 4}
    class_names = {'no': 1, 'yes': 2}
    sentiments = ['interaction', 'focus', 'happiness', 'activity']

    for i in xrange(Y_train.shape[1]):
        print "Fitting svm...."
        # svm_model = svm.LinearSVC(C=0.1)
        svm_model = svm.SVC(C=0.1,
                            kernel="linear",
                            decision_function_shape='ovr',
                            verbose=True)
        svm_model.fit(X_train, Y_train[:, i])

        joblib.dump(svm_model,
                    './final_svm_models/svm_%s_model.pkl' % sentiments[i])
        print "Predicting..."
        y_predict_train = svm_model.predict(X_train)
        y_predict = svm_model.predict(X_test)

        analysis.run_analyses(y_predict_train, Y_train[:, i], y_predict,
                              Y_test[:, i], class_names, sentiments[i])
コード例 #5
0
run_analyses([
    {
        'name': 'args',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'cl_args',
        'examples': []
    },
    {
        'name':
        'mo_verbtype',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'mother_verbtype',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PAST"'
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST"'
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF"'
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp.isin(["PAST PERF", "PAST ~ PAST PERF"])'
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp.isin(["PAST PERF", "PAST ~ PAST PERF"])'
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES"'
                          'and mother_verbtype == "yqtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES"'
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES"'
                          'and mother_verbtype == "ptcp"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES"'
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG"'
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG"'
                          'and mother_verbtype == "ptcp"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG"'
                          'and mother_verbtype == "wqtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "FUT ~ PRES"'
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "FUT ~ PAST"'
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES PART" '
                          'and mother_verbtype == "infa"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PART" '
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
        ]
    },
    {
        'name':
        'verb_lex',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'lex',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and lex == "אמר"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and lex == "בוא"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and lex == "עשׂה"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and lex == "עלה"'),
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF" '
                          'and lex == "מלט"'),
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF" '
                          'and lex == "מאס"'),
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF" '
                          'and lex == "היה"'),
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF" '
                          'and lex == "נתן"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PERF" '
                          'and lex == "לקח"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PERF" '
                          'and lex == "נתן"'),
            },
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and lex.isin(["ישׁב", "ירא", "ידע", "מלא"])'),
                'spread':
                35,
            },
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and lex == "אמר"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES PART" '
                          'and lex == "חוה"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES PART" '
                          'and lex == "זעק"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES PART" '
                          'and lex == "שׁבר"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES PART" '
                          'and lex == "יעץ"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG" '
                          'and lex == "הלך"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG" '
                          'and lex == "ישׁב"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG" '
                          'and lex == "אכל"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG" '
                          'and lex == "היה"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ TO INF" '
                          'and lex == "חוה"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ TO INF" '
                          'and lex == "ישׁב"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ TO INF" '
                          'and lex == "שׁתה"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ TO INF" '
                          'and lex == "חטא"'),
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ TO INF" '
                          'and lex == "כלה"'),
            },
        ]
    },
], snakemake.output.dir)
コード例 #6
0
run_analyses([
    {
        'name': 'eng_tenses',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'examples': [
        ],
    },
    {
        'name': 'esv_tenses',
        'df': esv_df, 
        'index': 'esv_TAM',
        'examples': [
            {
                'query': ('esv_TAM == "MOD"')
            }
        ]
    },
    {
        'name': 'niv_tenses',
        'df': niv_df,
        'index': 'niv_TAM',
        'examples': [
            {'query': 'niv_TAMsimp == "IMPV"'}
        ]
    },
    {
        'name': 'eng_simp_disagree',
        'df': disag_df_simp,
        'index': 'eng_TAMsimp',
        'examples': [
            {
                'query': ('esv_TAM == "FUT" or niv_TAM == "FUT"')
            },
            {
                'query': ('esv_TAM == "MOD let" or niv_TAM == "MOD let"')
            },
            {
                'query': ('esv_TAM == "MOD may" or niv_TAM == "MOD may"')
            },
            {
                'query': ('esv_TAM == "PRES" or niv_TAM == "PRES"')
            },
            {
                'query': ('esv_TAM == "MOD would" or niv_TAM == "MOD would"')
            },
            {
                'query': ('esv_TAM == "MOD shall" or niv_TAM == "MOD shall"')
            },
            {
                'query': ('esv_TAM == "PAST" or niv_TAM == "PAST"')
            },
            {
                'query': ('esv_TAM == "TO INF" or niv_TAM == "TO INF"')
            },
            {
                'query': ('esv_TAM == "MOD must" or niv_TAM == "MOD must"')
            },
            {
                'query': ('esv_TAM == "PRES PART" or niv_TAM == "PRES PART"')
            },
            {
                'query': ('esv_TAM == "MOD could" or niv_TAM == "MOD could"')
            },
            {
                'query': ('esv_TAM == "MOD might" or niv_TAM == "MOD might"')
            },
            {
                'query': ('esv_TAM == "MOD can" or niv_TAM == "MOD can"')
            },
            {
                'query': ('esv_TAM == "IMPV do not" or niv_TAM == "IMPV do not"')
            },
            {
                'query': ('esv_TAM == "IMPV" or niv_TAM == "IMPV"')
            },
        ]
    },
], snakemake.output.dir)  
コード例 #7
0
run_analyses(
    [
        {
            'name':
            'clause_type',
            'df':
            both_df,
            'index': ['eng_TAMsimp', 'person'],
            'columns':
            'clause_type',
            'examples': [
                {
                    'query': ('eng_TAMsimp.isin(["FUT", "FUT ~ MOD shall"]) '
                              'and person == "p3" '
                              'and clause_type.isin(["xYqX"])')
                },
            ],
        },
        {
            'name': 'cltype_maincl',
            'df': both_df[both_df.clause_rela == 'Main'],
            'index': ['eng_TAMsimp', 'person'],
            'columns': 'clause_type',
            'examples': []
        },
        {
            'name':
            'args',
            'df':
            both_df,
            'index': ['eng_TAMsimp', 'person'],
            'columns':
            'cl_args',
            'examples': [
                {
                    'query': ('eng_TAMsimp.isin(["FUT", "FUT ~ MOD shall"]) '
                              'and person == "p3" '
                              'and cl_args.isin(["_W_SV", "SV"]) '),
                },
            ]
        },
        {
            'name': 'args_maincl',
            'df': both_df[both_df.clause_rela == 'Main'],
            'index': ['eng_TAMsimp', 'person'],
            'columns': 'cl_args',
            'examples': []
        },

        #
        #    {
        #        'name': 'main_clause_type',
        #        'df': both_df[both_df.clause_rela == 'Main'],
        #        'index': 'eng_TAMsimp',
        #        'columns': 'clause_type',
        #    },
        #    {
        #        'name': 'clause_rela',
        #        'df': both_df,
        #        'index': 'eng_TAMsimp',
        #        'columns': 'clause_rela',
        #    },
        #    {
        #        'name': 'cltype_simp',
        #        'df': both_df,
        #        'index': 'eng_TAMsimp',
        #        'columns': 'cltype_simp',
        #    },
        #    {
        #        'name': 'rela_cltypesimp',
        #        'df': both_df,
        #        'index': 'eng_TAMsimp',
        #        'columns': ['clause_rela', 'cltype_simp'],
        #    },
        #    {
        #        'name': 'prec_part',
        #        'df': both_df,
        #        'index': 'eng_TAMsimp',
        #        'columns': 'prec_part',
        #        'examples': [
        #        ],
        #    },
    ],
    snakemake.output.dir)
コード例 #8
0
run_analyses([
    {
        'name':
        'has_objc',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'has_objc',
        'examples': [{
            'query': ('eng_TAMsimp == "PRES" '
                      'and has_objc == 1'),
        }, {
            'query': ('eng_TAMsimp == "PAST" '
                      'and has_objc == 1'),
        }, {
            'query': ('eng_TAMsimp == "FUT" '
                      'and has_objc == 1'),
        }],
    },
    {
        'name':
        'has_loca',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns': ['has_loca'],
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and has_loca == 1'),
            },
            {
                'query': ('eng_TAMsimp == "PAST PROG" '
                          'and has_loca == 1'),
            },
        ],
    },
    {
        'name': 'has_time',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['has_time'],
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and has_time == 1'),
            },
        ],
    },
], snakemake.output.dir)
コード例 #9
0
ファイル: main.py プロジェクト: zeshanmh/VisualSentiment
def main():
	##Torso Extraction##
	# img_path = "../data/groupdataset_release/images/4940922642_5dab04b030_o.jpg"
	# torso_extractor = TorsoExtractor()
	# torso_list, image = torso_extractor.detect_torsos(img_path)
	

	# ## Face Extraction ## 
	# # img_path = "../data/groupdataset_release/images/Library3.jpg"
	# # face_extractor = FaceExtractor()
	# # faces_lists, image = face_extractor.detect_faces(img_path)
	# # for face_list in faces_lists: 
	# # 	for (x,y,w,h) in face_list: 

	# extract_faces = False
	# extract_missed_faces = False 
	# if extract_faces: 
	# 	src_path = '../data/GENKI-R2009a/Subsets/GENKI-4K/files'
	# 	dest_path = './cache/GENKI_faces'
	# 	image_util.extract_GENKI_faces(src_path, dest_path)

	# if extract_missed_faces: 
	# 	src_path = '../data/GENKI-R2009a/Subsets/GENKI-4K/files'
	# 	dest_path = './cache/GENKI_faces/GENKI_faces_looser_bounds'
	# 	image_util.extract_missed_faces(dest_path)

	#SVM Training
	# img_path = "../data/GENKI-R2009a/Subsets/GENKI-4K/GENKI-4K_Images_Reduced.txt"
	# labels_path = "../data/GENKI-R2009a/Subsets/GENKI-4K/GENKI-4K_Labels_Reduced.txt"
	# img_path2 = '../data/groupdataset_release/images'
	# faces_path = '../data/groupdataset_release/faces'

	# train_again = False
	# if train_again:
	# 	svm = EmotionSVM(img_path, labels_path, img_path2, 'sad', dump=True)
	# 	svm.train()
	# 	# svm = train_smile_extractor(img_path, labels_path)	
	# 	# joblib.dump(svm, 'svm_model.pkl')
	# else: 
	# 	pass
	# 	# print 'Loading svm...'
	# 	# svm = EmotionSVM(img_path, labels_path, img_path2, 'smile', fit=False)
	# 	# all_face_features = get_all_face_features(img_path2, faces_path, svm)
	# 	# print all_face_features.shape
	# 	# np.save('../data/groupdataset_release/face_features.npy', all_face_features)

	# poselet_path = '../data/groupdataset_release/all_poseletes_hq'
	# all_poselet_features = get_all_poselet_features(poselet_path)
	# print all_poselet_features.shape
	# np.save('../data/groupdataset_release/poselet_features.npy', all_poselet_features)

	# basepath = '../data/groupdataset_release/annotations/all'
	# img_names = os.listdir(basepath)
	# svm = joblib.load('./svm_models/svm_orient_model.pkl')
	# X = get_image_orientation_features(svm)


	print "Extracting features..."
	X = construct_full_feature_matrix(only_poselet=True)
	Y = get_label_matrix('../data/groupdataset_release/image_annotations.csv')

	# binary = True 
	# if binary: 
	# 	Y[Y == 1] = 0
	# 	Y[Y == 2] = 0
	# 	Y[Y == 3] = 1 
	# 	Y[Y == 4] = 1 
	

	print "Splitting into train and test set..."
	X_train, X_test, Y_train, Y_test = sklearn.cross_validation.train_test_split(X, Y, test_size=0.2)



	# class_names = {'none': 1, 'low': 2, 'moderate': 3, 'high': 4}
	class_names = {'no': 1, 'yes': 2}
	sentiments = ['interaction', 'focus', 'happiness', 'activity']


	for i in xrange(Y_train.shape[1]):
		print "Fitting svm...."
		# svm_model = svm.LinearSVC(C=0.1)
		svm_model = svm.SVC(C=0.1, kernel="linear", decision_function_shape='ovr', verbose=True)
		svm_model.fit(X_train, Y_train[:,i])

		joblib.dump(svm_model, './final_svm_models/svm_%s_model.pkl'%sentiments[i])
		print "Predicting..."
		y_predict_train = svm_model.predict(X_train)
		y_predict = svm_model.predict(X_test)

		analysis.run_analyses(y_predict_train, Y_train[:,i], y_predict, Y_test[:,i], class_names, sentiments[i])	
コード例 #10
0
run_analyses(
    [
        {
            'name': 'clause_type',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': 'clause_type',
        },
        {
            'name': 'clause_rela',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': 'clause_rela',
        },
        {
            'name': 'cltype_simp',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': 'cltype_simp',
        },
        {
            'name': 'rela_cltypesimp',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': ['clause_rela', 'cltype_simp'],
        },
        {
            'name': 'args',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': 'cl_args',
            'examples': []
        },
        {
            'name': 'rela_particle',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': ['clause_rela', 'prec_part'],
        },
        {
            'name': 'prec_part',
            'df': esv_df,
            'index': 'esv_TAM',
            'columns': 'prec_part',
            'examples': [],
        },

        #    {
        #        'name': 'prec_part_gendom',
        #        'df': esv_df,
        #        'index': 'prec_part',
        #        'columns': ['genre', 'domain2', 'eng_TAM'],
        #    },
        #
        #    {
        #        'name': 'args_mother',
        #        'df': esv_df,
        #        'index': 'eng_TAM',
        #        'columns': ['cl_args','mother_verbtype'],
        #        'examples': [],
        #    },
        #    {
        #        'name': 'has_objc',
        #        'df': esv_df,
        #        'index': 'eng_TAM',
        #        'columns': 'has_objc',
        #    },
        #    {
        #        'name': 'has_loca',
        #        'df': esv_df,
        #        'index': 'eng_TAM',
        #        'columns': ['clause_rela', 'has_loca'],
        #        'examples': [
        #        ],
        #    },
        #    {
        #        'name': 'has_time',
        #        'df': esv_df,
        #        'index': 'eng_TAM',
        #        'columns': ['clause_rela', 'has_time'],
        #        'examples': [
        #        ],
        #    },
    ],
    snakemake.output.dir)
コード例 #11
0
run_analyses([
    {
        'name': 'has_objc',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'has_objc',
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and has_objc == 1 ')
            },
        ],
    },
    {
        'name':
        'has_loca',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns': ['has_loca'],
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                          'and has_loca == 1 '),
                'spread': 10,
            },
        ],
    },
    {
        'name':
        'has_time',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns': ['has_time'],
        'examples': [
            {
                'query': ('eng_TAMsimp.isin(["FUT", "FUT ~ MOD shall"]) '
                          'and has_time == 1 ')
            },
            {
                'query': ('eng_TAMsimp == "MOD is to ~ MOD shall" '
                          'and has_time == 1 ')
            },
            {
                'query': ('eng_TAMsimp == "IMPV ~ MOD shall" '
                          'and has_time == 1 ')
            },
        ],
    },
], snakemake.output.dir)
コード例 #12
0
run_analyses([
   {
        'name': 'clause_type',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'clause_type',
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                            'and clause_type == "WQtX"'),
            },
            {
                'query': ('eng_TAMsimp == "MOD shall" '
                            'and clause_type == "WQtX"'),
            },
        ],
    },
    {
        'name': 'main_clause_type',
        'df': both_df[both_df.clause_rela == 'Main'],
        'index': 'eng_TAMsimp',
        'columns': 'clause_type',
    },
    {
        'name': 'clause_rela',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'clause_rela',
    },
    {
        'name': 'cltype_simp',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'cltype_simp',
    },
    {
        'name': 'rela_cltypesimp',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['clause_rela', 'cltype_simp'],
    },
   {
        'name': 'args',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'cl_args',
        'examples': [
        ]
    },
   {
        'name': 'mo_verbtype',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'mother_verbtype',
        'examples': [
            {
                'query': ('eng_TAMsimp.isin(["FUT", "FUT ~ MOD shall"]) '
                            'and mother_verbtype == "ptcp"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES" '
                            'and mother_verbtype == "yqtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "IMPV" '
                            'and mother_verbtype == "impv"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "MOD must ~ MOD shall" '
                            'and mother_verbtype == "Ø"'),
                'bhs_text': ['mother_intertext', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                            'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_intertext', 'clause_atom'],
                'spread': 35,
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                            'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_intertext', 'clause_atom'],
                'spread': 35,
            },
        ]
    },

], snakemake.output.dir)  
コード例 #13
0
run_analyses([
    {
        'name':
        'eng_tenses',
        'df':
        eng_df,
        'index':
        'eng_TAMsimp',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES PART"')
            },
            {
                'query': ('eng_TAMsimp == "PAST PERF"')
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF"')
            },
            {
                'query': ('eng_TAMsimp == "PRES PERF PROG"')
            },
            {
                'query': ('eng_TAMsimp == "TO INF"')
            },
            {
                'query':
                ('eng_TAMsimp.str.match("MOD may|MOD was to|MOD is to|MOD let|MOD must")'
                 )
            },
        ],
    },
    {
        'name': 'esv_tenses',
        'df': esv_df,
        'index': 'esv_TAM',
        'examples': []
    },
    {
        'name': 'niv_tenses',
        'df': niv_df,
        'index': 'niv_TAMsimp',
    },
    {
        'name':
        'eng_simp_disagree',
        'df':
        disag_df_simp,
        'index':
        'eng_TAMsimp',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES ~ PRES PART"')
            },
            {
                'query': ('eng_TAMsimp == "PRES ~ PRES PROG"')
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PAST PROG"')
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES PART"')
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES"')
            },
            {
                'query': ('eng_TAMsimp == "FUT ~ PRES"')
            },
        ]
    },
], snakemake.output.dir)
コード例 #14
0
run_analyses([
    {
        'name': 'genre',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'genre',
        'examples': [],
    },
    {
        'name': 'domain',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'domain2',
        'examples': []
    },
    {
        'name': 'gendom',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['genre', 'domain2'],
        'examples': [],
    },
    {
        'name': 'period',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'period',
    },
    {
        'name': 'period_gendom',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['period', 'genre', 'domain2'],
    },
], snakemake.output.dir)
コード例 #15
0
run_analyses([
    {
        'name':
        'verb_stem',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns':
        'stem',
        'examples': [
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and stem == "nif" ')
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and stem == "qal" ')
            },
        ],
    },
    {
        'name': 'verb_lexst_ps',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': ['lex', 'stem', 'person'],
        'fishers': False,
    },
    {
        'name':
        'verb_person',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns':
        'person',
        'examples': [
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and person == "p1" '),
                'spread': 10,
            },
        ],
    },
    {
        'name': 'is_stative',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': ['esv_is', 'niv_is']
    },
], snakemake.output.dir)
コード例 #16
0
run_analyses([
    {
        'name':
        'args',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'cl_args',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and cl_args == "V"')
            },
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and cl_args == "RV"')
            },
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and cl_args == "QSV"')
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and cl_args == "RV"')
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and cl_args == "_W_SV"')
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and cl_args == "V"')
            },
            {
                'query': ('eng_TAMsimp == "PRES PART" '
                          'and cl_args == "V"')
            },
            {
                'query': ('eng_TAMsimp == "PRES PROG" '
                          'and cl_args == "RSV"')
            },
            {
                'query': ('eng_TAMsimp == "PRES PROG" '
                          'and cl_args == "ISV"')
            },
            {
                'query': ('eng_TAMsimp == "PAST PROG" '
                          'and cl_args == "_W_SV"')
            },
            {
                'query': ('eng_TAMsimp == "PAST PROG" '
                          'and cl_args == "SV"')
            },
            {
                'query': ('eng_TAMsimp == "PAST PROG" '
                          'and cl_args == "RV"')
            },
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and cl_args == "ISV"')
            },
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and cl_args == "CV"')
            },
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and cl_args == "ASV"')
            },
        ]
    },
    {
        'name':
        'mo_verbtype',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'mother_verbtype',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES"'
                          'and mother_verbtype == "yqtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PART" '
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PROG" '
                          'and mother_verbtype == "infc"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PROG" '
                          'and mother_verbtype == "yqtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PROG" '
                          'and mother_verbtype == "wqtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PRES PROG" '
                          'and mother_verbtype == "impv"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "PAST PROG" '
                          'and mother_verbtype == "wayq"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and mother_verbtype == "qtl"'),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
        ]
    },
    {
        'name': 'verb_lex',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'lex',
        'examples': []
    },
], snakemake.output.dir)
コード例 #17
0
run_analyses([
    {
        'name': 'eng_tenses',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'examples': [],
    },
    {
        'name': 'esv_tenses',
        'df': esv_df,
        'index': 'esv_TAM',
        'examples': []
    },
    {
        'name': 'niv_tenses',
        'df': niv_df,
        'index': 'niv_TAMsimp',
        'examples': [{
            'query': 'niv_TAMsimp == "IMPV"'
        }]
    },
    {
        'name':
        'eng_simp_disagree',
        'df':
        disag_df_simp,
        'index':
        'eng_TAMsimp',
        'examples': [
            {
                'query': 'eng_TAMsimp == "PAST ~ PRES PART"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "PAST ~ PAST PERF"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "PAST ~ PRES PERF"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "PAST ~ PRES"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "PAST ~ TO INF"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "PRES ~ PRES PERF"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "PAST ~ PAST PROG"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "FUT ~ PRES"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "MOD could ~ PAST"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp == "FUT ~ PAST"',
                'spread': 10,
            },
            {
                'query': 'eng_TAMsimp.str.match(".*FUT")',
                'spread': -1,
            },
        ]
    },
], snakemake.output.dir)
コード例 #18
0
run_analyses([
    {
        'name':
        'verb_lex',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns': ['lex', 'stem'],
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                          'and lex_etcbc == "HJH[" '),
            },
            {
                'query': ('eng_TAMsimp == "FUT" '
                          'and lex_etcbc == "NTN[" '),
                'spread': 10,
            },
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                          'and lex_etcbc == "MWT[" '),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                          'and lex_etcbc == "NPL[" '),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "IMPV ~ MOD shall" '
                          'and lex_etcbc == "<FH[" '),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "MOD may" '
                          'and lex_etcbc == ">KL[" '),
                'spread': 10,
            },
            {
                'query': ('eng_TAMsimp == "IMPV" '
                          'and lex_etcbc == "JR>[" '),
                'spread': 10,
            },
        ],
        'special': [],
    },
], snakemake.output.dir)
コード例 #19
0
run_analyses([
    {
        'name': 'eng_tenses',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'examples': [],
    },
    {
        'name': 'esv_tenses',
        'df': esv_df,
        'index': 'esv_TAMsimp',
    },
    {
        'name': 'niv_tenses',
        'df': niv_df,
        'index': 'niv_TAMsimp',
    },
    {
        'name': 'trans_tam',
        'df': eng_df,
        'index': 'esv_TAMsimp',
        'columns': 'niv_TAMsimp',
        'fishers': False,
        'examples': [],
    },
    {
        'name': 'disag_genre',
        'df': disag_df_simp,
        'index': 'eng_TAMsimp',
        'columns': 'genre',
    },
    {
        'name': 'disag_domain',
        'df': disag_df_simp,
        'index': 'eng_TAMsimp',
        'columns': 'domain2',
    },
    {
        'name': 'disag_gendom',
        'df': disag_df_simp,
        'index': 'eng_simp_agree',
        'columns': ['genre', 'domain2'],
    },
], snakemake.output.dir)
コード例 #20
0
run_analyses([
    {
        'name':
        'genre',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'genre',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES" '
                          'and genre == "poetry" '),
            },
        ],
    },
    {
        'name': 'domain',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'domain2',
    },
    {
        'name':
        'gendom',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns': ['genre', 'domain2'],
        'examples': [
            {
                'query': ('eng_TAMsimp.isin(["FUT ~ MOD shall", "FUT"]) '
                          'and genre == "prophetic" '
                          'and domain2 == "Q"'),
            },
            {
                'query': ('eng_TAMsimp.isin(["FUT ~ MOD shall", "FUT"]) '
                          'and genre == "prose" '
                          'and domain2 == "Q"'),
            },
            {
                'query': ('eng_TAMsimp == "IMPV" '
                          'and genre == "prose" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "MOD let" '
                          'and genre == "prose" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "MOD may" '
                          'and genre == "prose" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "IMPV" '
                          'and genre == "poetry" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "MOD can" '
                          'and genre == "poetry" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "IMPV ~ MOD shall" '
                          'and genre == "instruction" '),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "MOD must ~ MOD shall" '
                          'and genre == "instruction" '),
                'spread':
                10,
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and domain2.isin(["D", "N"]) '
                          'and genre == "prose" '),
                'spread':
                35,
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and genre == "poetry" '),
                'spread': 35,
            },
        ],
    },
    {
        'name': 'ps_gendom',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['genre', 'domain2', 'person'],
    },
    {
        'name': 'period',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'period',
    },
    {
        'name': 'period_gendom',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['period', 'genre', 'domain2'],
    },
], snakemake.output.dir)
コード例 #21
0
import sys
# NB snakemake runs script from /workflow directory
sys.path.append('scripts/analysis')
from load_dfs import DfLoader
from analysis import run_analyses

# load the dataframes
DfLoad = DfLoader(snakemake.input.data_dir)
both_df = DfLoad.eng_both()


# features needed for selections
main_genre = ['prose', 'poetry', 'prophetic']
main_dom = ['Q', 'N']

run_analyses([
   {
        'name': 'args',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'cl_args',
        'examples': [
        ]
    },
], snakemake.output.dir)  

コード例 #22
0
import sys
import pandas as pd

# NB snakemake runs script from /workflow directory
sys.path.append('scripts/analysis')
from load_dfs import DfLoader
from analysis import run_analyses

# load the dataframes
DfLoad = DfLoader(snakemake.input.data_dir)
both_df = DfLoad.eng_both()

run_analyses([
    {
        'name': 'verb_lex',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['lex', 'stem'],
        'examples': [
        ],
    },
], snakemake.output.dir)  
コード例 #23
0
run_analyses([
    {
        'name': 'verb_stem',
        'df': both_df, 
        'index': 'eng_TAMsimp',
        'columns': 'stem',
    },
#    {
#        'name': 'verb_lexst_ps',
#        'df': both_df,
#        'index': 'eng_TAMsimp',
#        'columns': ['lex', 'stem', 'person'],
#        'fishers': False,
#    },
    {
        'name': 'verb_person',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'person',
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT" '
                            'and person == "p1" ')
            },
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                            'and person == "p1" ')
            },

       ],
    },
    {
        'name': 'is_stative',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['esv_is', 'niv_is']
    },
], snakemake.output.dir)  
コード例 #24
0
run_analyses([
    {
        'name':
        'eng_tenses',
        'df':
        eng_df,
        'index':
        'eng_TAMsimp',
        'examples': [{
            'query': 'esv_TAMsimp == "MOD would"',
            'spread': 35,
        }, {
            'query': 'esv_TAMsimp == "HAB used to"',
            'spread': 35,
        }],
    },
    {
        'name': 'esv_tenses',
        'df': esv_df,
        'index': 'esv_TAMsimp',
    },
    {
        'name': 'niv_tenses',
        'df': niv_df,
        'index': 'niv_TAMsimp',
    },
    {
        'name': 'both_tenses',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'special': [{
            'df': 'count',
            'do': sum_top_values
        }],
    },
    {
        'name': 'eng_simp_agree',
        'df': agg_df,
        'index': 'eng_simp_agree',
    },
    {
        'name': 'eng_simp_disagree',
        'df': disag_df_simp,
        'index': 'eng_TAMsimp',
    },
    {
        'name': 'trans_tam',
        'df': eng_df,
        'index': 'esv_TAMsimp',
        'columns': 'niv_TAMsimp',
        'fishers': False,
        'examples': [],
    },
    {
        'name': 'both_genre',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'columns': 'genre',
    },
    {
        'name': 'both_domain',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'columns': 'domain2',
    },
    {
        'name': 'both_gendom',
        'df': eng_df,
        'index': 'eng_TAMsimp',
        'columns': ['genre', 'domain2'],
    },
    {
        'name': 'disag_genre',
        'df': disag_df_simp,
        'index': 'eng_TAMsimp',
        'columns': 'genre',
    },
    {
        'name': 'disag_domain',
        'df': disag_df_simp[disag_df_simp.domain2.isin(['N', 'Q'])],
        'index': 'eng_TAMsimp',
        'columns': 'domain2',
    },
    {
        'name': 'disag_gendom',
        'df': eng_df[eng_df.domain2.isin(['N', 'Q'])],
        'index': 'eng_simp_agree',
        'columns': ['genre', 'domain2'],
    },
    {
        'name': 'inter_gendom',
        'df': disag_df_simp,
        'index': 'eng_TAMsimp',
        'columns': ['genre', 'domain2'],
        'examples': [],
    },
], snakemake.output.dir)
コード例 #25
0
run_analyses([
    {
        'name': 'clause_type',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'clause_type',
        'examples': [],
    },
    {
        'name': 'clause_rela',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'clause_rela',
    },
    {
        'name': 'cltype_simp',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'cltype_simp',
    },
    {
        'name': 'args',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'cl_args',
        'examples': []
    },
    {
        'name': 'mo_verbtype',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'mother_verbtype',
        'examples': []
    },
], snakemake.output.dir)
コード例 #26
0
run_analyses([
    {
        'name':
        'french_tense',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns':
        'french_tense',
        'examples': [
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and french_tense == "imparfait"'),
                'spread':
                -1,
                'extra_text': {
                    'NBS': 'french',
                    'NBS (verse)': 'french_verse'
                },
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and french_tense == "passé_comp"'),
                'spread':
                -1,
                'extra_text': {
                    'NBS': 'french',
                    'NBS (verse)': 'french_verse'
                },
            },
            {
                'query': ('eng_TAMsimp == "PAST" '
                          'and french_tense == "passé_simp"'),
                'spread':
                -1,
                'extra_text': {
                    'NBS': 'french',
                    'NBS (verse)': 'french_verse'
                },
            },
            {
                'query': ('eng_TAMsimp == "PAST ~ PRES" '
                          'and french_tense == "imparfait"'),
                'spread':
                -1,
                'extra_text': {
                    'NBS': 'french',
                    'NBS (verse)': 'french_verse'
                },
            },
        ],
    },
], snakemake.output.dir)
コード例 #27
0
run_analyses([
    {
        'name': 'eng_tenses',
        'df': eng_df,
        'index': 'eng_TAM',
    },
    {
        'name': 'esv_tenses',
        'df': esv_df,
        'index': 'esv_TAM',
    },
    {
        'name': 'niv_tenses',
        'df': niv_df,
        'index': 'niv_TAM',
    },
    {
        'name': 'eng_agree',
        'df': both_df,
        'index': 'eng_agree',
    },
    {
        'name': 'eng_disagree',
        'df': disag_df,
        'index': 'eng_TAM',
    },
    {
        'name': 'trans_tam',
        'df': both_df,
        'index': 'esv_TAM',
        'columns': 'niv_TAM',
        'fishers': False,
    },
    {
        'name': 'disag_past',
        'df': disag_df[disag_df.eng_TAM.str.match('.*PAST\.\.IND')],
        'index': 'eng_TAM',
    },
    {
        'name':
        'disag_pres_perf',
        'df':
        disag_df[disag_df.eng_TAM.str.match('.*PRES\.PERF\.IND')],
        'index':
        'eng_TAM',
        'examples': [
            {
                'query': ('eng_TAM == "FUT..IND ~ PRES.PERF.IND"'),
            },
            {
                'query': ('eng_TAM == "PRES..IND ~ PRES.PERF.IND"'),
            },
        ],
    },
    {
        'name': 'disag_pres',
        'df': disag_df[disag_df.eng_TAM.str.match('.*PRES\.\.IND')],
        'index': 'eng_TAM',
        'examples': [
            {
                'query': ('eng_TAM == "FUT..IND ~ PRES..IND"'),
            },
        ],
    },
    {
        'name': 'disag_domain',
        'df': disag_df[disag_df.domain2.isin(['N', 'Q'])],
        'index': 'eng_TAM',
        'columns': 'domain2',
    },
    {
        'name': 'disag_gendom',
        'df': both_df[both_df.domain2.isin(['N', 'Q'])],
        'index': 'eng_agree',
        'columns': ['genre', 'domain2'],
    },
    {
        'name':
        'inter_gendom',
        'df':
        disag_df,
        'index':
        'eng_TAM',
        'columns': ['genre', 'domain2'],
        'examples': [
            {
                'query': ('eng_TAM == "PAST..IND ~ PRES.PERF.IND" '
                          'and genre == "prose" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PAST..IND ~ PAST.PERF.IND" '
                          'and genre == "prose" '
                          'and domain2 == "N"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PAST..IND ~ PRES.PERF.IND" '
                          'and genre == "prose" '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PAST..IND ~ PRES..IND" '
                          'and genre.isin(["poetry", "prophetic"]) '
                          'and domain2 == "Q"'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PAST..IND ~ PRES..IND" '
                          'and genre == "instruction" '
                          'and domain2 == "Q"'),
                'spread':
                2,
            },
        ],
    },
], snakemake.output.dir)
コード例 #28
0
run_analyses([
    {
        'name': 'has_objc',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'has_objc',
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT ~ MOD shall" '
                            'and has_objc == 0 '),
            },
            {
                'query': ('eng_TAMsimp == "PRES" '
                            'and has_objc == 0 '),
            },
       ],
    },
    {
        'name': 'has_objc_person',
        'df': both_df,
        'index': ['eng_TAMsimp', 'person'],
        'columns': 'has_objc',
        'examples': [
            {
                'query': ('eng_TAMsimp == "FUT" '
                            'and person == "p1" '
                            'and has_objc == 1')
            },
       ],
    },
    {
        'name': 'has_loca',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['has_loca'],
        'examples': [
       ],
    },
    {
        'name': 'has_time',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': ['has_time'],
        'examples': [
            {
                'query': ('eng_TAMsimp == "PRES" '
                            'and has_time == 0 '),
            },
            {
                'query': ('eng_TAMsimp == "MOD is to ~ MOD shall" '
                            'and has_time == 1'),
                'spread': 10,
            },
 
       ],
    },
], snakemake.output.dir)  
コード例 #29
0
run_analyses([
    {
        'name': 'genre',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'genre',
        'examples': [],
    },
    {
        'name': 'domain',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'domain2',
    },
    {
        'name':
        'gendom',
        'df':
        both_df,
        'index':
        'eng_TAMsimp',
        'columns': ['genre', 'domain2'],
        'examples': [
            {
                'query': ('genre == "poetry" '
                          'and eng_TAMsimp == "PRES"'),
                'spread': 15,
            },
            {
                'query': ('genre == "prophetic" '
                          'and eng_TAMsimp == "PRES"'),
                'spread': 15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PRES"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "N" '
                          'and eng_TAMsimp == "PAST"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PAST"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prophetic" '
                          'and eng_TAMsimp == "PAST"'),
                'spread': 15,
            },
            {
                'query': ('genre == "poetry" '
                          'and eng_TAMsimp == "PAST"'),
                'spread': 15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "N" '
                          'and eng_TAMsimp == "PRES PART"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PRES PART"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "poetry" '
                          'and eng_TAMsimp == "PRES PART"'),
                'spread': 15,
            },
            {
                'query': ('genre == "prophetic" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PRES PART"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PRES PROG"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "instruction" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PRES PROG"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prophetic" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "PRES PROG"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "N" '
                          'and eng_TAMsimp == "PAST PROG"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "prophetic" '
                          'and eng_TAMsimp == "FUT"'),
                'spread': 15,
            },
            {
                'query': ('genre == "prose" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "FUT"'),
                'spread':
                15,
            },
            {
                'query': ('genre == "poetry" '
                          'and domain2 == "Q" '
                          'and eng_TAMsimp == "FUT"'),
                'spread':
                15,
            },
        ],
    },
    {
        'name': 'period',
        'df': both_df,
        'index': 'eng_TAMsimp',
        'columns': 'period',
    },
], snakemake.output.dir)
コード例 #30
0
run_analyses([
    {
        'name': 'clause_type',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': 'clause_type',
    },
    {
        'name': 'clause_rela',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': 'clause_rela',
    },
    {
        'name': 'clause_rela',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': 'clause_rela',
    },
    {
        'name': 'cltype_simp',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': 'cltype_simp',
    },
    {
        'name': 'rela_cltypesimp',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': ['clause_rela', 'cltype_simp'],
    },
    {
        'name': 'rela_particle',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': ['clause_rela', 'prec_part'],
    },
    {
        'name':
        'prec_part',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns':
        'prec_part',
        'examples': [
            {
                'query': ('eng_TAM == "PRES.PERF.IND" '
                          'and prec_part == "_KJ_" '),
            },
            {
                'query': ('eng_TAM == "PRES.PERF.IND" '
                          'and prec_part.str.match("^_>M_.*")'),
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and prec_part.str.match(".*_KH_")'),
                'spread':
                15,
            },
            {
                'query': ('eng_TAM == "PAST.PERF.IND" '
                          'and prec_part == "_>CR_"'),
                'spread': 25,
            },
            {
                'query': ('eng_TAM == "PAST.PERF.IND" '
                          'and clause_rela == "SubArg" '
                          'and prec_part == "_KJ_" '),
                'bhs_text': ['mother_clause_atom', 'clause_atom'],
            },
        ],
    },
    {
        'name': 'prec_part_gendom',
        'df': eng_df,
        'index': 'prec_part',
        'columns': ['genre', 'domain2', 'eng_TAM'],
    },
    {
        'name':
        'args',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns':
        'cl_args',
        'examples': [
            {
                'query': 'eng_TAM == "PAST..IND" and cl_args == "_W_SV"',
                'spread': 20,
            },
            {
                'query': 'eng_TAM == "PAST..IND" and cl_args == "_W_OV"',
                'spread': 20,
            },
            {
                'query': 'eng_TAM == "PAST..IND" and cl_args == "_W_AV"',
                'spread': 20,
            },
            {
                'query': 'eng_TAM == "PAST..IND" and cl_args == "SV"',
            },
            {
                'query': ('eng_TAM == "PRES.PERF.IND" '
                          'and cl_args.str.match("QV") ')
            },
            {
                'query': ('eng_TAM == "PRES.PERF.IND" '
                          'and cl_args.str.match("IV") ')
            },
            {
                'query': ('eng_TAM == "PRES.PERF.IND" '
                          'and cl_args.str.match("C[OS]?V") '
                          'and (~prec_part.str.match(".*_KJ_|.*_>M_")) '),
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PRES.PERF.IND" '
                          'and cl_args == "V"')
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and lex_etcbc != ">MR[" '
                          'and cl_args == "V"')
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and lex_etcbc != ">MR[" '
                          'and cl_args == "AV"')
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and cl_args == "CAV"')
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and cl_args == "RSV"')
            },
            {
                'query': ('eng_TAM == "PRES..IND" '
                          'and cl_args == "_W_SAV"')
            },
        ]
    },
    {
        'name':
        'args_mother',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns': ['cl_args', 'mother_verbtype'],
        'examples': [
            {
                'query': ('eng_TAM == "PAST..IND" '
                          'and cl_args.str.match("_W_[OS]V") '
                          'and mother_verbtype == "wayq" '),
                'bhs_text': ['mother_clause', 'clause_atom']
            },
            {
                'query': ('eng_TAM == "PAST..IND" '
                          'and cl_args.str.match("_W_[OS]V") '
                          'and mother_verbtype == "wayq" '
                          'and mother_verb_lex == lex_etcbc '),
                'bhs_text': ['mother_clause', 'clause_atom'],
                'spread':
                10,
            },
            {
                'query': ('eng_TAM == "PAST..IND"'
                          'and cl_args.str.match("_W_[OS]V") '
                          'and mother_verbtype == "wayq" '
                          'and mother_verbplain == "יהי" '
                          'and mother_verb_lex == "HJH[" '),
                'bhs_text': ['mother_clause', 'clause_atom'],
                'spread':
                20,
            },
            {
                'query': ('eng_TAM == "PAST..IND"'
                          'and cl_args == "CV" '
                          'and mother_verbtype == "yqtl" '),
                'bhs_text': ['mother_clause', 'clause_atom'],
            },
            {
                'query': ('eng_TAM == "PAST..IND"'
                          'and cl_args == "RV" '
                          'and mother_verbtype == "yqtl" '),
                'bhs_text': ['mother_clause', 'clause_atom'],
            },
            {
                'query': ('eng_TAM == "PAST..IND"'
                          'and cl_args == "CV" '
                          'and mother_verbtype == "impv" '),
                'bhs_text': ['mother_clause', 'clause_atom'],
            },
        ],
    },
    {
        'name': 'has_objc',
        'df': eng_df,
        'index': 'eng_TAM',
        'columns': 'has_objc',
    },
    {
        'name':
        'has_loca',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns': ['clause_rela', 'has_loca'],
        'examples': [{
            'query': ('eng_TAM == "PAST..IND" '
                      'and has_loca == 1 '
                      'and clause_rela == "Main"')
        }],
    },
    {
        'name':
        'has_time',
        'df':
        eng_df,
        'index':
        'eng_TAM',
        'columns': ['clause_rela', 'has_time'],
        'examples': [{
            'query': ('eng_TAM == "PAST..IND" '
                      'and has_time  == 1 '
                      'and clause_rela == "Main" ')
        }],
    },
], snakemake.output.dir)
コード例 #31
0
from load_dfs import DfLoader
from analysis import run_analyses

# load the dataframes
DfLoad = DfLoader(snakemake.input.data_dir)
eng_df = DfLoad.eng_agree()
#esv_df = DfLoad.esv()
#niv_df = DfLoad.niv()
#both_df = DfLoad.eng_both()
#disag_df = DfLoad.eng_disagree()



run_analyses([
    {
        'name': 'inchoatives',
        'df': eng_df,
        'examples': [
            {
                'query': (
                    'eng_TAM == "PAST" '
                    'and (niv.str.match(".*became") | esv.str.match(".*became"))'
                ),
                'spread': -1, # i.e. all
            } 
        ]
    },
], snakemake.output.dir)