## input arguments
	import getopt

	add_opts = [
		('-b', ['-b: percentage of beginning section']),
		('-m', ['-m: percentage of middle section']),
		('-e', ['-e: percentage of ending section']),
		('-l', ['-l: minimum occurrence of a pattern',
				'                 n: at least occurs < n > times for each pattern'])
	]

	try:
		opts, args = getopt.getopt(sys.argv[1:],'hb:m:e:l:v',['help', 'begPercentage=', 'midPercentage=', 'endPercentage=', 'min_count=', 'verbose'])
	except getopt.GetoptError:
		config.help(config.patternPositionFeat_name, addon=add_opts, exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help(config.patternPositionFeat_name, addon=add_opts)
		elif opt in ('-b'): config.begPercentage = int(arg.strip())
		elif opt in ('-m'): config.midPercentage = int(arg.strip())
		elif opt in ('-e'): config.endPercentage = int(arg.strip())
		elif opt in ('-l','--limit'): config.min_count = int(arg.strip())
		elif opt in ('-v','--verbose'): config.verbose = True

	## insert metadata
	setting = { 
		"feature_name": "pattern_position", 
		"section": "b"+ str(config.begPercentage) + "_m" + str(config.midPercentage) + "_e" + str(config.endPercentage), 
		"min_count": config.min_count 
	}
	
	return True


if __name__ == '__main__':
	import getopt

	add_opts = [
		('setting_id', ['<setting_id>: specify setting ID(s) (e.g., 537086fcd4388c7e81676914, or 537086fcd4388c7e81676914,537c6c90d4388c0e27069e7b)', 
					   '           which can be retrieved from the mongo collection features.settings' ]),
	]

	arg_idx = 2 if len(sys.argv) > 1 and not sys.argv[1].startswith('-') else 1
	try:
		opts, args = getopt.getopt(sys.argv[arg_idx:],'hvo',['help', 'verbose', 'overwrite'])
		setting_id_str = sys.argv[1].strip()
	except:
		config.help('toSVM', addon=add_opts, args=['<setting_id>'], exit=2)

	## read options
	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help('toSVM',args=['setting_id'], addon=add_opts)
		elif opt in ('-v','--verbose'): config.verbose = True
		elif opt in ('-o','--overwrite'): config.overwrite = True

	## set log level
	loglevel = logging.DEBUG if config.verbose else logging.INFO
	logging.basicConfig(format='[%(levelname)s] %(message)s', level=loglevel)

	run()
	import getopt

	add_opts = [
		('setting_id', ['<setting_id>: specify a setting ID (e.g., 537086fcd4388c7e81676914)', 
					   '           which can be retrieved from the mongo collection features.settings' ]),
		('--quiet', 	['--quiet: run svm in quiet mode']), 
		('--core', 		['-c, --core: multi-core for svm']), 
		('--param', 	['--param: parameter string for libsvm (e.g., use "c4b1" or "-c 4 -b 1" to represent the libsvm parameters -c 4 -b 1)'])		
	]

	arg_idx = 2 if len(sys.argv) > 1 and not sys.argv[1].startswith('-') else 1
	try:
		opts, args = getopt.getopt(sys.argv[arg_idx:],'hvop:q',['help', 'verbose', 'overwrite', 'list', 'param=','multi=', 'quiet'])
		setting_id = sys.argv[1].strip()
	except:
		config.help('run_svm', addon=add_opts, args=['<setting_id>'], exit=2)


	svm_params = []
	## read options
	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help('run_svm', addon=add_opts)
		elif opt in ('-q','--quiet'): quiet_mode = True
		elif opt in ('-s','--scale'): scale_mode = True
		elif opt in ('-p','--param'): svm_params = parse_params(arg.strip())
		elif opt in ('--multi'): cores = int(arg.strip())
		elif opt in ('-v','--verbose'): config.verbose = True
		elif opt in ('-o','--overwrite'): config.overwrite = True

	## set log level
	loglevel = logging.DEBUG if config.verbose else logging.INFO
	## input arguments
	import getopt
	
	add_opts = [
		('-n', ['-n or --minCount: filter out patterns with minimum count',
			    '                  k: minimum count']),
		('--debug', ['--debug: run in debug mode']),
		('--remove', ['--remove: remove self count'])
	]

	try:
		# opts, args = getopt.getopt(sys.argv[1:],'hf:n:c:vr:',['help', 'featureValueType=', 'minCount=', 'cut=', 'verbose', 'debug'])
		opts, args = getopt.getopt(sys.argv[1:],'hn:vr:o',['help', 'minCount=', 'verbose', 'debug', 'overwrite', 'remove'])
	except getopt.GetoptError:
		config.help(program, addon=add_opts, exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help(program, addon=add_opts)
		# elif opt in ('-f', '--featureValueType'): config.featureValueType = arg.strip()
		elif opt in ('-n', '--minCount'): config.minCount = int( arg.strip() )
		elif opt in ('-c', '--cut'): config.cutoffPercentage = int( arg.strip() )
		elif opt in ('-v','--verbose'): config.verbose = True
		elif opt in ('-o','--overwrite'): config.overwrite = True
		elif opt in ('--remove'): remove = True
		elif opt in ('--debug'): config.debug = True

	loglevel = logging.DEBUG if config.verbose else logging.INFO
	logging.basicConfig(format='[%(levelname)s] %(message)s', level=loglevel)

	logging.debug('connecting mongodb at %s/%s' % (config.mongo_addr, config.db_name))
	co_feature = db['features.keyword']

	## input arguments
	import getopt

	add_opts = [
		('-k', ['-k: keyword set in WordNetAffect',
				'                 0: basic',
				'                 1: extend']),
		('--lemma', ['--lemma: use word lemma when looking for keywords'])
	]

	try:
		opts, args = getopt.getopt(sys.argv[1:],'hk:v',['help', 'keyword_type=', 'lemma', 'verbose'])
	except getopt.GetoptError:
		config.help(config.keywordFeat_name, addon=add_opts, exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help(config.keywordFeat_name, addon=add_opts)
		elif opt in ('-k','--keyword_type'): 
			if int(arg.strip()) == 0: config.keyword_type = 'basic'
			elif int(arg.strip()) == 1: config.keyword_type = 'extend'
		elif opt in ('--lemma'): config.lemma = True
		elif opt in ('-v','--verbose'): config.verbose = True

	## insert metadata
	setting = { 
		"feature_name": "keyword", 
		"keyword_type": config.keyword_type,
		"lemma": config.lemma 
	}
	import getopt

	add_opts = [
		('--setting', 	['--setting: specify a setting ID (e.g., 537b00e33681df445d93d57e)', 
					   	 '           which can be retrieved from the mongo collection features.settings' ]),
		('--all', 		['-a, --all: evaluate and update all current experiments, default: '+str(update_all)+' )']), 
		('--param', 	['--param: parameter string for libsvm (e.g., use "b1c4", default: '+param+' )']),
		('--path', 		['-p, --path: path to local files (default: '+root+' )']),
		('--inter', 	['-i, --inter: intersection with 2005 Mishne05'])
	]

	try:
		opts, args = getopt.getopt(sys.argv[1:],'hvai',['help', 'verbose', 'setting=', 'param=', 'all', 'inter'])
	except getopt.GetoptError:
		config.help('run_svm', addon=add_opts, exit=2)

	## read options
	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help('run_svm', addon=add_opts)
		elif opt in ('-a','--all'): update_all = True
		elif opt in ('-i','--inter'): intersection = True
		elif opt in ('--param'): param = arg.strip()
		elif opt in ('-p','--path'): root = arg.strip()
		elif opt in ('--setting'): setting_id = arg.strip()
		elif opt in ('-v','--verbose'): config.verbose = True

	## select collections
	co_svm_eval = db[config.co_svm_eval_name]
	co_svm_out = db[config.co_svm_out_name]
	co_svm_gold = db[config.co_svm_gold_name]
	print >> sys.stdout, '='*40
	print >> sys.stdout, 'Avg. LJ40K:', round(avg_LJ40K,4)
	print >> sys.stdout, 'Avg. Mishne05:', round(avg_Mishne05,4)
	print >> sys.stdout, 'Avg. Overall:', round(avg_shared,4)
	# print >> sys.stderr, avg_LJ40K, avg_Mishne05, avg_shared

	return avg_LJ40K, avg_Mishne05, avg_shared

if __name__ == '__main__':
	  
	import getopt
	
	try:
		opts, args = getopt.getopt(sys.argv[1:],'hp:d:g:s:l:vo',['help','ps_function=', 'ds_function=', 'sig_function=', 'smoothing=', 'limit=', 'verbose', 'overwrite'])
	except getopt.GetoptError:
		config.help('evaluation', exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help('evaluation')
		elif opt in ('-p','--ps_function'): config.ps_function_type = int(arg.strip())
		elif opt in ('-d','--ds_function'): config.ds_function_type = int(arg.strip())
		elif opt in ('-g','--sig_function'): config.sig_function_type = int(arg.strip())
		elif opt in ('-s','--smoothing'): config.smoothing_type = int(arg.strip())
		elif opt in ('-l','--limit'): config.min_count = int(arg.strip())
		elif opt in ('-v','--verbose'): config.verbose = True
		elif opt in ('-o','--overwrite'): config.overwrite = True

	## fetch from collection
	config.co_docscore_name = '_'.join([config.co_docscore_prefix] + config.getOpts(fields=config.opt_fields[config.ev_name], full=False))

	# if cannot find the fetch target collection
			mdoc = { 
				'udocID': doc['udocID'], 
				'gold_emotion': gold_emotion, 
				'scores': scores
			}
			co_docscore.insert( mdoc )

if __name__ == '__main__':
	  
	import getopt

	try:
		opts, args = getopt.getopt(sys.argv[1:],'hp:d:g:s:l:vo',['help','ps_function=', 'ds_function=', 'sig_function=', 'smoothing=', 'limit=', 'verbose', 'overwrite'])
	except getopt.GetoptError:
		config.help(config.ds_name, exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help(config.ds_name)
		elif opt in ('-p','--ps_function'): config.ps_function_type = int(arg.strip())
		elif opt in ('-d','--ds_function'): config.ds_function_type = int(arg.strip())
		elif opt in ('-g','--sig_function'): config.sig_function_type = int(arg.strip())
		elif opt in ('-s','--smoothing'): config.smoothing_type = int(arg.strip())
		elif opt in ('-l','--limit'): config.min_count = int(arg.strip())
		elif opt in ('-v','--verbose'): config.verbose = True
		elif opt in ('-o','--overwrite'): config.overwrite = True

	## select mongo collections
	co_emotions = db[config.co_emotions_name]
	co_docs = db[config.co_docs_name]
	co_pats = db[config.co_pats_name]
		docIDs.append( {'filename': fn, 'topic': topic, 'ldocID': ldocID, 'udocID': udocID} )
	return docIDs

if __name__ == '__main__':

	import getopt

	add_opts = [
		('--path', ['-p or --path: specify the input corpus path']),
		('--database', ['-d or --database: specify the destination database name']),
	]

	try:
		opts, args = getopt.getopt(sys.argv[1:],'hp:d:',['help','path=', 'database='])
	except getopt.GetoptError:
		config.help('extract_dependency', addon=add_opts, exit=2)

	## read options
	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help('extract_dependency', addon=add_opts)
		elif opt in ('-p','--path'): corpus_root = arg.strip()
		elif opt in ('-d','--database'): config.db_name = arg.strip()

	if not corpus_root:
		print 'specify the input corpus path: e.g., python extract_dependency.py -p /corpus/NTCIR/'
		exit(-1)

	if not corpus_root:
		print 'specify the destination database name: e.g., python extract_dependency.py -d NTCIR'
		exit(-1)
Esempio n. 10
0
    co_patscore.create_index('pattern')

    print >> sys.stderr, 'done.'


if __name__ == '__main__':

    import getopt

    try:
        opts, args = getopt.getopt(
            sys.argv[1:], 'hp:s:vo',
            ['help', 'ps_function=', 'smoothing=', 'verbose', 'overwrite'])
    except getopt.GetoptError:
        config.help(config.ps_name, exit=2)

    for opt, arg in opts:
        if opt in ('-h', '--help'): config.help(config.ps_name)
        elif opt in ('-p', '--ps_function'):
            config.ps_function_type = int(arg.strip())
        elif opt in ('-s', '--smoothing'):
            config.smoothing_type = int(arg.strip())
        elif opt in ('-v', '--verbose'):
            config.verbose = True
        elif opt in ('-o', '--overwrite'):
            config.overwrite = True

    # check if fetch source existed
    co_lexicon_existed = config.co_lexicon_name in db.collection_names()
    if not co_lexicon_existed:
		mongo_collection.save(doc)


if __name__ == '__main__':

	import getopt

	add_opts = [
		('--database', ['-d or --database: specify the destination database name']),
		('--topic', ['--topic: identify as "topic" rather than "emotion"']),
	]

	try:
		opts, args = getopt.getopt(sys.argv[1:],'hd:v',['help','database=','topic','verbose'])
	except getopt.GetoptError:
		config.help('extract_pattern', addon=add_opts, exit=2)

	## read options
	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help('extract_pattern', addon=add_opts)
		elif opt in ('-d','--database'): config.db_name = arg.strip()
		elif opt in ('-v','--verbose'): config.verbose = True
		elif opt in ('--topic'): topic_or_emotion = 'topic'

	db = mc[config.db_name]
	co_deps = db[config.co_deps_name]
	co_pats = db[config.co_pats_name]

	# # rule = [('subj',1), ('cop', 1)]
	# rule = [('prep', 0), ('subj',0), ('obj',0)]
	# # targets = ['JJ']
	## select mongo collections
	co_emotions = db[config.co_emotions_name]
	co_docs = db[config.co_docs_name]
	co_pats = db[config.co_pats_name]
	co_nestedLexicon = db['lexicon.nested']

	## target mongo collections
	co_setting = db['features.settings']
	co_feature = db['features.pattern']

	## input arguments
	import getopt
	try:
		opts, args = getopt.getopt(sys.argv[1:],'hl:v',['help', 'min_count=', 'verbose'])
	except getopt.GetoptError:
		config.help(config.patternFeat_name, exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help(config.patternFeat_name)
		elif opt in ('-l','--limit'): config.min_count = int(arg.strip())
		elif opt in ('-v','--verbose'): config.verbose = True

	## insert metadata
	setting = { 
		"feature_name": "pattern", 
		"min_count": config.min_count 
	}

	## print confirm message
	config.print_confirm(setting.items(), bar=40, halt=True)
	
Esempio n. 13
0
                ]),
                ('-r', [
                    '-r: remove self count',
                    "                 0: dont't remove anything",
                    '                 1: minus-one',
                    '                 f: minus-frequency'
                ]), ('-p', ['-p: use position lexicon'])]

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hb:m:e:f:n:c:r:pv', [
            'help', 'begPercentage=', 'midPercentage=', 'endPercentage=',
            'featureValueType=', 'minCount=', 'cut', 'verbose'
        ])
    except getopt.GetoptError:
        config.help(config.patternEmotionPositionFeat_name,
                    addon=add_opts,
                    exit=2)

    for opt, arg in opts:
        if opt in ('-h', '--help'):
            config.help(config.patternEmotionPositionFeat_name, addon=add_opts)
        elif opt in ('-b'):
            config.begPercentage = int(arg.strip())
        elif opt in ('-m'):
            config.midPercentage = int(arg.strip())
        elif opt in ('-e'):
            config.endPercentage = int(arg.strip())
        elif opt in ('-f'):
            config.featureValueType = arg.strip()
        elif opt in ('-n'):
            config.minCount = int(arg.strip())
        '<setting_id>: specify a setting ID (e.g., 537086fcd4388c7e81676914)',
        '           which can be retrieved from the mongo collection features.settings'
    ]), ('--quiet', ['--quiet: run svm in quiet mode']),
                ('--core', ['-c, --core: multi-core for svm']),
                ('--param', [
                    '--param: parameter string for libsvm (e.g., use "c4b1" or "-c 4 -b 1" to represent the libsvm parameters -c 4 -b 1)'
                ])]

    arg_idx = 2 if len(sys.argv) > 1 and not sys.argv[1].startswith('-') else 1
    try:
        opts, args = getopt.getopt(sys.argv[arg_idx:], 'hvop:q', [
            'help', 'verbose', 'overwrite', 'list', 'param=', 'multi=', 'quiet'
        ])
        setting_id = sys.argv[1].strip()
    except:
        config.help('run_svm', addon=add_opts, args=['<setting_id>'], exit=2)

    svm_params = []
    ## read options
    for opt, arg in opts:
        if opt in ('-h', '--help'): config.help('run_svm', addon=add_opts)
        elif opt in ('-q', '--quiet'): quiet_mode = True
        elif opt in ('-s', '--scale'): scale_mode = True
        elif opt in ('-p', '--param'): svm_params = parse_params(arg.strip())
        elif opt in ('--multi'): cores = int(arg.strip())
        elif opt in ('-v', '--verbose'): config.verbose = True
        elif opt in ('-o', '--overwrite'): config.overwrite = True

    ## set log level
    loglevel = logging.DEBUG if config.verbose else logging.INFO
    logging.basicConfig(format='[%(levelname)s] %(message)s', level=loglevel)
                ('-m', ['-m: percentage of middle section']),
                ('-e', ['-e: percentage of ending section']),
                ('-k', [
                    '-k: keyword set in WordNetAffect',
                    '                 0: basic', '                 1: extend'
                ]),
                ('--lemma',
                 ['--lemma: use word lemma when looking for keywords'])]

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hb:m:e:k:v', [
            'help', 'begPercentage=', 'midPercentage=', 'endPercentage=',
            'keyword_type=', 'lemma', 'verbose'
        ])
    except getopt.GetoptError:
        config.help(config.keywordPositionFeat_name, addon=add_opts, exit=2)

    for opt, arg in opts:
        if opt in ('-h', '--help'):
            config.help(config.keywordPositionFeat_name, addon=add_opts)
        elif opt in ('-b'):
            config.begPercentage = int(arg.strip())
        elif opt in ('-m'):
            config.midPercentage = int(arg.strip())
        elif opt in ('-e'):
            config.endPercentage = int(arg.strip())
        elif opt in ('-k', '--keyword_type'):
            if int(arg.strip()) == 0: config.keyword_type = 'basic'
            elif int(arg.strip()) == 1: config.keyword_type = 'extend'
        elif opt in ('--lemma'): config.lemma = True
        elif opt in ('-v', '--verbose'): config.verbose = True
    co_emotions = db[config.co_emotions_name]
    co_docs = db[config.co_docs_name]
    co_pats = db[config.co_pats_name]
    co_nestedLexicon = db['lexicon.nested']

    ## target mongo collections
    co_setting = db['features.settings']
    co_feature = db['features.pattern']

    ## input arguments
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hl:v',
                                   ['help', 'min_count=', 'verbose'])
    except getopt.GetoptError:
        config.help(config.patternFeat_name, exit=2)

    for opt, arg in opts:
        if opt in ('-h', '--help'): config.help(config.patternFeat_name)
        elif opt in ('-l', '--limit'): config.min_count = int(arg.strip())
        elif opt in ('-v', '--verbose'): config.verbose = True

    ## insert metadata
    setting = {"feature_name": "pattern", "min_count": config.min_count}

    ## print confirm message
    config.print_confirm(setting.items(), bar=40, halt=True)

    ## insert metadata
    setting_id = str(co_setting.insert(setting))