def create_tfidf_feat_idea_general(training_map, feature_map_character, constant, easy_domain_flag, args):
    """
    ラベルごとの重要語を取り出す
    TFIDFスコアを文書集合から算出した後,ラベル文書ごとに閾値(足切り値)を求め,閾値以下の語は素性を作らない
    これで,「あるラベルに特徴的な語」を示す素性が作れた.と思う

    ラベルごとで素性選択を行う.
    疑似文書(ラベル)の作成→ラベル文書ごとにTFIDF計算→ラベルごとに閾値計算→
    閾値を設定する→素性選択
    """
    import math;
    tfidf_type=args.tfidf_type;
    L2_flag=True;
    num_of_discarded_feat=0;
    #単語スコアを保存しておくmap
    word_score_map={};
    #------------------------------------------------------------
    print 'TFIDF(Idea-2,3,4) score calculating'
    print 'L2 flag:{}'.format(L2_flag)
    #------------------------------------------------------------
    #訓練コーパスからラベルごとに疑似文書を作成する
    #TFIDF計算の入力にはlist documents [list document [ unicode token ] ]が必要
    if tfidf_type=='normal':
        print 'use tfidf(normal)'
        all_training_instances=[];
        tmp_document_controll_map={};
        for label in training_map:
            #for k, v in sorted(training_map[subdata].items()):
            label_document=[t for t in training_map[label]];
            if label in tmp_document_controll_map:
                tmp_document_controll_map[label]+=label_document;
            else:
                tmp_document_controll_map[label]=label_document
     
        for k, v in sorted(tmp_document_controll_map.items()):
            all_training_instances.append(tmp_document_controll_map[k]);
        w_dt_maps_list=tf_idf.tf_idf_interface(all_training_instances);
    #------------------------------------------------------------
    elif args.tfidf_type=='nishimura':
        #TFIDF計算の入力にはlist documents [list document [ list sub-document [unicode token] ] ]が必要
        #IDFの計算にはlist documents[unicode token]が必要だが,これは関数内で自動で変換してくれる
        print 'use tfidf(nishimura)'
        all_training_instances=[];
        tmp_document_controll_map={};
        for subdata in training_map:
            for k, v in sorted(training_map[subdata].items()):
                if k in tmp_document_controll_map:
                    tmp_document_controll_map[k]+=training_map[subdata][k];
                else:
                    tmp_document_controll_map[k]=training_map[subdata][k];
        for k, v in sorted(tmp_document_controll_map.items()):         
            all_training_instances.append(tmp_document_controll_map[k]);
        w_dt_maps_list=tf_idf.tf_idf_nishimura_interface(all_training_instances);
        all_training_instances=[document for sub_document in all_training_instances for document in sub_document]
    #------------------------------------------------------------
    #全文書での重みスコアを足す
    for document_index,w_dt_map in enumerate(w_dt_maps_list):
        for t in w_dt_map:
            if t not in word_score_map:
                word_score_map[t]=w_dt_map[t];
            else:
                word_score_map[t]+=w_dt_map[t];
    #アルファベットのリスト
    alphabet_list=[chr(i) for i in range(65,65+26)];
    alphabet_list.remove('I');
    alphabet_list.remove('O');
    alphabet_list.remove('Y');
    #全文書の重みスコアにL2正則化をかけて,閾値を算出 
    if L2_flag==True:
        #ラベルごとの閾値を保存しておくmap
        #map threshold_point_map {unicode label:float threshold_point}
        threshold_point_map={};
        #L2正則化をかける        
        weight_sum=0;    
        for key in word_score_map:
            weight=word_score_map[key];
            weight_sum+=(weight)**2;
        L2_norm=math.sqrt(weight_sum);
        #L2正則化された単語重みを保存しておくmap
        #map L2_normalized_map {map L2_normalized_map_per_label unicode label:{unicode token:float weight}}
        L2_normalized_map={};
        
        for label_index,w_dt_map in enumerate(w_dt_maps_list):
            alphabet_label=alphabet_list[label_index];
            L2_normalized_map_per_label={};
            threshold_point_in_label=0;
            for token in w_dt_map:
                L2_normalized_weight=w_dt_map[token]/L2_norm;
                if constant==True:
                    L2_normalized_map_per_label[token]=L2_normalized_weight;        
                else:
                    L2_normalized_map_per_label[token]=1;        
                #L2で正規化したラベル内の重みの和を求める
                threshold_point_in_label+=L2_normalized_weight;
            #ラベル内の閾値
            threshold_point_map[alphabet_label]=threshold_point_in_label/len(w_dt_map);
            if constant==True:
                #ラベル内の単語重み(L2正則化済み)の保存
                L2_normalized_map[alphabet_label]=L2_normalized_map_per_label;
            else:
                L2_normalized_map[alphabet_label]=L2_normalized_map_per_label;

        for alphabet_label in L2_normalized_map:
            threshold=threshold_point_map[alphabet_label];
            for t in L2_normalized_map[alphabet_label]:
                #足切り
                if L2_normalized_map[alphabet_label][t] < threshold:
                    num_of_discarded_feat+=1;
                else:
                    if constant==True:
                        weight_format=u'{}_{}_{}'.format(alphabet_label, t, L2_normalized_map[alphabet_label][t]);
                    else:
                        #素性ベクトルを定数倍しないので,1をたてておく.つまりBinary素性
                        weight_format=u'{}_{}_{}'.format('normal', t, 1);
                    if t not in feature_map_character:
                        feature_map_character[t]=[weight_format];
                    elif weight_format not in feature_map_character[t]:
                        feature_map_character[t].append(weight_format);
    #閾値で足切りしない場合
    else:
        for t in word_score_map:
            weight_format=u'{}_{}_{}'.format('normal', t, word_score_map[t]);

    print 'The number of discarded features:{}'.format(num_of_discarded_feat);

    return feature_map_character, L2_normalized_map;
def create_tfidf_feat_idea1(training_map, feature_map_character, args):
    """
    TFIDFにて素性選択をするidea-1
    文書集合全体で素性選択を行う.
    疑似文書(ラベル)の作成→ラベル文書ごとにTFIDF計算→全文書でのスコアを足す→全単語のスコアを足す→
    閾値を設定する→素性選択
    """
    import math;
    stop=args.stop;
    tfidf_type=args.tfidf_type;
    L2_flag=True;
    num_of_discarded_feat=0;
    #TFIDF空間にペルシア語コーパスの語も含めるか?
    persian_flag=False;    
    #単語スコアを保存しておくmap
    word_score_map={};
    #------------------------------------------------------------
    print 'TFIDF(Idea-1) score calculating'
    print 'L2 flag:{} Persian flag:{}'.format(L2_flag, persian_flag);
    #------------------------------------------------------------
    #訓練コーパスからラベルごとに疑似文書を作成する
    #TFIDF計算の入力にはlist documents [list document [ unicode token ] ]が必要
    if tfidf_type=='normal':
        print 'use tfidf(normal)'
        all_training_instances=[];
        tmp_document_controll_map={};
        for subdata in training_map:
            for k, v in sorted(training_map[subdata].items()):
                label_document=[t for doc in v for t in doc];
                if k in tmp_document_controll_map:
                    tmp_document_controll_map[k]+=label_document;
                else:
                    tmp_document_controll_map[k]=label_document
     
        for k, v in sorted(tmp_document_controll_map.items()):
            all_training_instances.append(tmp_document_controll_map[k]);
        w_dt_maps_list=tf_idf.tf_idf_interface(all_training_instances);
    #------------------------------------------------------------
    elif args.tfidf_type=='nishimura':
        #TFIDF計算の入力にはlist documents [list document [ list sub-document [unicode token] ] ]が必要
        #IDFの計算にはlist documents[unicode token]が必要だが,これは関数内で自動で変換してくれる
        print 'use tfidf(nishimura)'
        all_training_instances=[];
        tmp_document_controll_map={};
        for subdata in training_map:
            for k, v in sorted(training_map[subdata].items()):
                if k in tmp_document_controll_map:
                    tmp_document_controll_map[k]+=training_map[subdata][k];
                else:
                    tmp_document_controll_map[k]=training_map[subdata][k];
        for k, v in sorted(tmp_document_controll_map.items()):         
            all_training_instances.append(tmp_document_controll_map[k]);
        w_dt_maps_list=tf_idf.tf_idf_nishimura_interface(all_training_instances);
        all_training_instances=[document for sub_document in all_training_instances for document in sub_document]
    #------------------------------------------------------------
    #全文書での重みスコアを足す
    for document_index,w_dt_map in enumerate(w_dt_maps_list):
        for t in w_dt_map:
            if t not in word_score_map:
                word_score_map[t]=w_dt_map[t];
            else:
                word_score_map[t]+=w_dt_map[t];
    #全文書の重みスコアにL2正則化をかけて,閾値を算出 
    if L2_flag==True:
        #L2正則化をかける        
        weight_sum=0;    
        for key in word_score_map:
            weight=word_score_map[key];
            weight_sum+=(weight)**2;
        L2_norm=math.sqrt(weight_sum);
        L2_normalized_map={};    
        L2_weightsum=0;
        for key in word_score_map:
            normalized_score=word_score_map[key]/L2_norm;
            L2_normalized_map[key]=normalized_score;        
            #L2で正規化した重みの和を求める
            L2_weightsum+=normalized_score;
        #足切りスコアの算出
        L2_average=L2_weightsum/len(L2_normalized_map);
        for doc in all_training_instances:
            for t in doc:
                if t in L2_normalized_map:
                    if L2_normalized_map[t] < L2_average:
                        #足切り
                        num_of_discarded_feat+=1;
                    else:
                        weight_format=u'{}_{}_{}'.format('normal', t, L2_normalized_map[t]);
                        if t not in feature_map_character:
                            feature_map_character[t]=[weight_format];
                        elif weight_format not in feature_map_character[t]:
                            feature_map_character[t].append(weight_format);
    #閾値で足切りしない場合
    else:
        for t in word_score_map:
            weight_format=u'{}_{}_{}'.format('normal', t, word_score_map[t]);
    
    print 'The number of discarded features :{}'.format(num_of_discarded_feat);
    #コード書き換えに伴い変数名が変わったので,一時的な措置
    tfidf_score_map=word_score_map;

    return feature_map_character, tfidf_score_map;