elif opt in ('-r'): remove_type = arg.strip()
        elif opt in ('-v', '--verbose'): config.verbose = True
        elif opt in ('--debug'): config.debug = True

    ## create metadata
    setting = {
        "feature_name": "keyword_emotion",
        "keyword_type": config.keyword_type,
        "lemma": config.lemma,
        "feature_value_type": config.featureValueType,
        "cutoff_percentage": config.cutoffPercentage,
        "remove": remove_type
    }

    ## print confirm message
    config.print_confirm(setting.items(), bar=40, halt=True)

    ## insert metadata
    setting_id = str(co_setting.insert(setting))

    ## specify keyword lexicon
    if config.keyword_type == 'basic':
        if config.lemma:
            co_keyword_lexicon = db['lexicon.keyword.basic.w_lemma']
        else:
            co_keyword_lexicon = db['lexicon.keyword.basic.wo_lemma']
    elif config.keyword_type == 'extend':
        if config.lemma:
            co_keyword_lexicon = db['lexicon.keyword.extend.w_lemma']
        else:
            co_keyword_lexicon = db['lexicon.keyword.extend.wo_lemma']
		opts, args = getopt.getopt(sys.argv[1:],'hb:m:e:l:v',['help', 'begPercentage=', 'midPercentage=', 'endPercentage=', 'min_count=', 'verbose'])
	except getopt.GetoptError:
		config.help(config.patternPositionFeat_name, addon=add_opts, exit=2)

	for opt, arg in opts:
		if opt in ('-h', '--help'): config.help(config.patternPositionFeat_name, addon=add_opts)
		elif opt in ('-b'): config.begPercentage = int(arg.strip())
		elif opt in ('-m'): config.midPercentage = int(arg.strip())
		elif opt in ('-e'): config.endPercentage = int(arg.strip())
		elif opt in ('-l','--limit'): config.min_count = int(arg.strip())
		elif opt in ('-v','--verbose'): config.verbose = True

	## insert metadata
	setting = { 
		"feature_name": "pattern_position", 
		"section": "b"+ str(config.begPercentage) + "_m" + str(config.midPercentage) + "_e" + str(config.endPercentage), 
		"min_count": config.min_count 
	}

	## print confirm message
	config.print_confirm(setting.items(), bar=40, halt=True)
	
	## insert metadata
	setting_id = str(co_setting.insert( setting ))

	## run
	import time
	s = time.time()	
	create_pattern_features()
	print 'Time total:',time.time() - s,'sec'
	co_docscore = db[ config.co_docscore_name ]
	co_results = db[ config.co_results_name ]

	## confirm message
	confirm_msg = [
		(config.ps_function_name, config.ps_function_type),
		(config.ds_function_name, config.ds_function_type),
		(config.sig_function_name, config.sig_function_type),
		(config.limit_name, config.min_count),
		('fetch collection', config.co_docscore_name, '(existed)' if co_docscore_existed else '(none)'),
		('insert collection', config.co_results_name, '(existed)' if mdoc_results_existed else '(none)'),
		('verbose', config.verbose),
		('overwrite', config.overwrite, { True: color.render('!Note: This will drop the collection [ '+config.co_docscore_name+' ]' if co_docscore_existed else '', 'red'), False: '' })
	]


	config.print_confirm(confirm_msg, bar=40, halt=True if not skip_eval else False)


	if skip_eval:
		## (warning) destination's already existed
		print >> sys.stderr, '(warning) destination mongo doc', color.render(config.co_results_name+' > '+cfg, 'red'),'is already existed'
		print >> sys.stderr, '\t  use -o or --overwrite to force update'

	if not skip_eval:
		evals()

	if skip_eval:
		average()
		### =======================================
		## check destination files/folder
		### =======================================
		if fusion_mode and fusion_id:
			new_pathes = check_destination(pathes, token=fusion_id, ext='txt')
		else:
			new_pathes = check_destination(pathes, token=setting_id, ext='txt')

		## confirm message
		confirm_msg = [
			('[opt]\tfetch collection', color.render(co_feature_name, 'y'), '(ok)' if co_feature_existed else '(none)'),
			('[opt]\tdestination', color.render(pathes['_root_'], 'y') ),
			('[opt]\tverbose', config.verbose ),
			('[opt]\toverwrite', config.overwrite)
		]
		config.print_confirm(confirm_msg, bar=40, halt=True)
		
		# -- run --
		## generate svm vectors
		print >> sys.stderr, 'generating ','fused' if fusion_mode and fusion_id else '', ' vectors...',
		sys.stderr.flush()
		vectors = generate_vectors()
		print >> sys.stderr, 'done.'

		## generate test and train files
		print >> sys.stderr, 'generate test train files...'
		generate_test_train_files(vectors, new_pathes)
		sys.stderr.flush()
		print >> sys.stderr, 'done.'

		if fusion_mode:
	co_patscore = db[ config.co_patscore_name ]
	co_docscore = db[ config.co_docscore_name ]

	# check if the index(es) are fully-functional
	good_index = check_indexes(target=co_patscore, indexes=['pattern'], auto=False)
	if not good_index:
		exit(-1)

	## confirm message
	confirm_msg = [
		(config.ps_function_name, config.ps_function_type),
		(config.ds_function_name, config.ds_function_type),
		(config.sig_function_name, config.sig_function_type),
		(config.smoothing_name, config.smoothing_type),
		(config.limit_name, config.min_count),
		('fetch collection', config.co_patscore_name, '(existed)' if co_patscore_existed else '(none)'),
		('insert collection', config.co_docscore_name, '(existed)' if co_docscore_existed else '(none)'),
		('verbose', config.verbose),
		('overwrite', config.overwrite, { True: color.render('!Note: This will drop the collection [ '+config.co_docscore_name+' ]' if co_docscore_existed else '', 'red'), False: '' } )
	]

	config.print_confirm(confirm_msg, bar=40, halt=True)
	
	## run
	import time
	s = time.time()
	update_all_document_scores()
	print 'Time total:',time.time() - s,'sec'

				
Ejemplo n.º 6
0
         '(existed)' if co_docscore_existed else '(none)'),
        ('insert collection', config.co_results_name,
         '(existed)' if mdoc_results_existed else '(none)'),
        ('verbose', config.verbose),
        ('overwrite', config.overwrite, {
            True:
            color.render(
                '!Note: This will drop the collection [ ' +
                config.co_docscore_name + ' ]' if co_docscore_existed else '',
                'red'),
            False:
            ''
        })
    ]

    config.print_confirm(confirm_msg,
                         bar=40,
                         halt=True if not skip_eval else False)

    if skip_eval:
        ## (warning) destination's already existed
        print >> sys.stderr, '(warning) destination mongo doc', color.render(
            config.co_results_name + ' > ' + cfg, 'red'), 'is already existed'
        print >> sys.stderr, '\t  use -o or --overwrite to force update'

    if not skip_eval:
        evals()

    if skip_eval:
        average()