elif opt in ('-r'): remove_type = arg.strip() elif opt in ('-v', '--verbose'): config.verbose = True elif opt in ('--debug'): config.debug = True ## create metadata setting = { "feature_name": "keyword_emotion", "keyword_type": config.keyword_type, "lemma": config.lemma, "feature_value_type": config.featureValueType, "cutoff_percentage": config.cutoffPercentage, "remove": remove_type } ## print confirm message config.print_confirm(setting.items(), bar=40, halt=True) ## insert metadata setting_id = str(co_setting.insert(setting)) ## specify keyword lexicon if config.keyword_type == 'basic': if config.lemma: co_keyword_lexicon = db['lexicon.keyword.basic.w_lemma'] else: co_keyword_lexicon = db['lexicon.keyword.basic.wo_lemma'] elif config.keyword_type == 'extend': if config.lemma: co_keyword_lexicon = db['lexicon.keyword.extend.w_lemma'] else: co_keyword_lexicon = db['lexicon.keyword.extend.wo_lemma']
opts, args = getopt.getopt(sys.argv[1:],'hb:m:e:l:v',['help', 'begPercentage=', 'midPercentage=', 'endPercentage=', 'min_count=', 'verbose']) except getopt.GetoptError: config.help(config.patternPositionFeat_name, addon=add_opts, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.patternPositionFeat_name, addon=add_opts) elif opt in ('-b'): config.begPercentage = int(arg.strip()) elif opt in ('-m'): config.midPercentage = int(arg.strip()) elif opt in ('-e'): config.endPercentage = int(arg.strip()) elif opt in ('-l','--limit'): config.min_count = int(arg.strip()) elif opt in ('-v','--verbose'): config.verbose = True ## insert metadata setting = { "feature_name": "pattern_position", "section": "b"+ str(config.begPercentage) + "_m" + str(config.midPercentage) + "_e" + str(config.endPercentage), "min_count": config.min_count } ## print confirm message config.print_confirm(setting.items(), bar=40, halt=True) ## insert metadata setting_id = str(co_setting.insert( setting )) ## run import time s = time.time() create_pattern_features() print 'Time total:',time.time() - s,'sec'
co_docscore = db[ config.co_docscore_name ] co_results = db[ config.co_results_name ] ## confirm message confirm_msg = [ (config.ps_function_name, config.ps_function_type), (config.ds_function_name, config.ds_function_type), (config.sig_function_name, config.sig_function_type), (config.limit_name, config.min_count), ('fetch collection', config.co_docscore_name, '(existed)' if co_docscore_existed else '(none)'), ('insert collection', config.co_results_name, '(existed)' if mdoc_results_existed else '(none)'), ('verbose', config.verbose), ('overwrite', config.overwrite, { True: color.render('!Note: This will drop the collection [ '+config.co_docscore_name+' ]' if co_docscore_existed else '', 'red'), False: '' }) ] config.print_confirm(confirm_msg, bar=40, halt=True if not skip_eval else False) if skip_eval: ## (warning) destination's already existed print >> sys.stderr, '(warning) destination mongo doc', color.render(config.co_results_name+' > '+cfg, 'red'),'is already existed' print >> sys.stderr, '\t use -o or --overwrite to force update' if not skip_eval: evals() if skip_eval: average()
### ======================================= ## check destination files/folder ### ======================================= if fusion_mode and fusion_id: new_pathes = check_destination(pathes, token=fusion_id, ext='txt') else: new_pathes = check_destination(pathes, token=setting_id, ext='txt') ## confirm message confirm_msg = [ ('[opt]\tfetch collection', color.render(co_feature_name, 'y'), '(ok)' if co_feature_existed else '(none)'), ('[opt]\tdestination', color.render(pathes['_root_'], 'y') ), ('[opt]\tverbose', config.verbose ), ('[opt]\toverwrite', config.overwrite) ] config.print_confirm(confirm_msg, bar=40, halt=True) # -- run -- ## generate svm vectors print >> sys.stderr, 'generating ','fused' if fusion_mode and fusion_id else '', ' vectors...', sys.stderr.flush() vectors = generate_vectors() print >> sys.stderr, 'done.' ## generate test and train files print >> sys.stderr, 'generate test train files...' generate_test_train_files(vectors, new_pathes) sys.stderr.flush() print >> sys.stderr, 'done.' if fusion_mode:
co_patscore = db[ config.co_patscore_name ] co_docscore = db[ config.co_docscore_name ] # check if the index(es) are fully-functional good_index = check_indexes(target=co_patscore, indexes=['pattern'], auto=False) if not good_index: exit(-1) ## confirm message confirm_msg = [ (config.ps_function_name, config.ps_function_type), (config.ds_function_name, config.ds_function_type), (config.sig_function_name, config.sig_function_type), (config.smoothing_name, config.smoothing_type), (config.limit_name, config.min_count), ('fetch collection', config.co_patscore_name, '(existed)' if co_patscore_existed else '(none)'), ('insert collection', config.co_docscore_name, '(existed)' if co_docscore_existed else '(none)'), ('verbose', config.verbose), ('overwrite', config.overwrite, { True: color.render('!Note: This will drop the collection [ '+config.co_docscore_name+' ]' if co_docscore_existed else '', 'red'), False: '' } ) ] config.print_confirm(confirm_msg, bar=40, halt=True) ## run import time s = time.time() update_all_document_scores() print 'Time total:',time.time() - s,'sec'
'(existed)' if co_docscore_existed else '(none)'), ('insert collection', config.co_results_name, '(existed)' if mdoc_results_existed else '(none)'), ('verbose', config.verbose), ('overwrite', config.overwrite, { True: color.render( '!Note: This will drop the collection [ ' + config.co_docscore_name + ' ]' if co_docscore_existed else '', 'red'), False: '' }) ] config.print_confirm(confirm_msg, bar=40, halt=True if not skip_eval else False) if skip_eval: ## (warning) destination's already existed print >> sys.stderr, '(warning) destination mongo doc', color.render( config.co_results_name + ' > ' + cfg, 'red'), 'is already existed' print >> sys.stderr, '\t use -o or --overwrite to force update' if not skip_eval: evals() if skip_eval: average()