def refreshCoeffTableAndFollowing(self):
        if self.train_fname != self.prev_train_fname:
            # read an appropriate coefficient file
            coeff_fname = os.path.join(self.coefficient_dir,
                    self.train_fname + '_coeff.json')

            tables = json.loads(open(coeff_fname).read())
            pw = tables['pws']
            pwf = tables['pwfs']
            self.coeff_table = CoeffTable(tables = (pw, pwf))
            self.prev_train_fname = self.train_fname

            # read an appropriate following file
            f_fname = os.path.join(self.following_dir, self.train_fname)
            followings = {}     # we will fill out this dict
            for line in open(f_fname):
                user_id, f_list = line.rstrip('\n').split('\t')
                f_list = f_list.split(' ')
                followings[user_id] = f_list

            self.followings = followings
import yaml
import json
from passWeightCoeff import CoeffTable


fin_name = '../data/semi/coefficient/train0_coeff.json'
tables = json.loads(open(fin_name).read())
pw = tables['pws']
pwf = tables['pwfs']

print 'loaded'

coeff = CoeffTable(tables = (pw, pwf))

print 'start cal'

coeff.lookUp()
class ReweightingTextFeatureConverter(TextFeatureConverter):
    '''
    The attributes that should be set by the caller:

    1. train_fname (instance variable)
    2. hard_label_fname (class variable)
    3. coefficient_dir (class variable)
    4. following data dir (class variable)
    '''

    def __init__(self, getText = None, stopfile = None):
        '''
        Need to read in hard-label list, so we pass those users later. Also we
        need to read-in the coefficient file.
        '''
        super(ReweightingTextFeatureConverter,
                self).__init__(getText, stopfile)    # if any

        self.hard_label_fname = ReweightingTextFeatureConverter.hard_label_fname
        self.coefficient_dir = ReweightingTextFeatureConverter.coefficient_dir
        self.following_dir = ReweightingTextFeatureConverter.following_dir

        # this is used to cache things
        self.prev_train_fname = None

        # read in all the hard labeled users
        hard_label_list = []
        for line in open(self.hard_label_fname):
            user_id = line.split('\t')[0]
            hard_label_list.append(user_id)
        self.hard_label_list = hard_label_list


    def refreshCoeffTableAndFollowing(self):
        if self.train_fname != self.prev_train_fname:
            # read an appropriate coefficient file
            coeff_fname = os.path.join(self.coefficient_dir,
                    self.train_fname + '_coeff.json')

            tables = json.loads(open(coeff_fname).read())
            pw = tables['pws']
            pwf = tables['pwfs']
            self.coeff_table = CoeffTable(tables = (pw, pwf))
            self.prev_train_fname = self.train_fname

            # read an appropriate following file
            f_fname = os.path.join(self.following_dir, self.train_fname)
            followings = {}     # we will fill out this dict
            for line in open(f_fname):
                user_id, f_list = line.rstrip('\n').split('\t')
                f_list = f_list.split(' ')
                followings[user_id] = f_list

            self.followings = followings


    def additionalPass(self, words_arr, user_id, semi_label):
        '''
        Override superclass method to add another pass. this pass will
        adjust the feature weight, based on the precomoputed p(w) and p(w|f)
        values for each class in the observed clean dataset.

        First, we should be able to distinguish if user_id is weakly labeled
        data, or hard labeled data which can be done easily by reading
        hard-label file. *The caller of this class is responsible to set the
        proper training file name*, so this method can know which file to read to
        calculate the coefficient.
        '''
        if user_id in self.hard_label_list:     # if clean data
            return words_arr                    # do nothing.

        # refresh coeff table and following list if needed
        self.refreshCoeffTableAndFollowing()

        # pre-load some common variables
        following = self.followings[user_id]
        pw_table = self.coeff_table.pwTable(int(semi_label))
        pwf_table = self.coeff_table.pwfTable(int(semi_label))

        # Now start to process each word in words_arr
        def _reweight(word_weight):
            word, weight = word_weight

            # load numerator, i.e., p(w)
            if not word in pw_table:
                return (word, weight)

            pw = pw_table[word]        # numerator, or p(w)

            # load denominator, i.e., p(w|f). This part is more involved since
            # we should calculate the noisy-or.

            # for any entry available in the pwf_table, we combine the
            # coefficient.
            mul_1_q = 1             # PI_i {1 - q_i}
            for f in following:
                try:
                    value = pwf_table[f][word]         # p(w|f)
                    mul_1_q *= (1 - value) / (1 - pw)
                except KeyError:
                    continue

            # final constant
            if mul_1_q == 1:
                c_wf = 1
            else:
                c_wf = min(pw / (1 - (1 - pw) * mul_1_q), 2.0)

            return (word, c_wf * weight)

        words_arr = map(_reweight, words_arr)

        return words_arr