def compare_coupons_visit_list(self):

        os.chdir(self.train_data_dir)

        # Get userid / couponid info from valid coupon visit file
        # valid_coupon_visit_train.csv was created from coupon_visit_train, has only coupon information
        # for coupons in coupon_list_train.csv. All other coupons are in file rogue_coupon_visit_train.csv
        f = open('valid_coupon_visit_train.csv', 'r')

        numlines = sf.file_len(f.name)
        linenum = 0
        temp_array_userid = np.empty(numlines, dtype=object)
        temp_array_couponid = np.empty(numlines, dtype=object)

        for line in f:
            temp_array_userid[linenum] = line.split(',')[5]
            temp_array_couponid[linenum] = line.split(',')[4]
            linenum += 1

        array_userid = np.unique(temp_array_userid, return_index=False)
        array_couponid = np.unique(temp_array_couponid, return_index=False)

        logger.debug(array_userid.shape)
        logger.debug(array_couponid.shape)

        f.close()

        # Get coupon info from coupon list file. These are the valid coupons for the train data set
        f = open('coupon_list_train.csv', 'r')
        #  f = open('/home/harsha/kaggle/ponpare/data/user_list.csv', 'r')

        numlines = sf.file_len(f.name)
        linenum = 0
        temp_array_couponid = np.empty(numlines, dtype=object)

        for line in f:
            temp_array_couponid[linenum] = line.split(',')[23]
            linenum += 1

        array_couponid_list = np.unique(temp_array_couponid, return_index=False)

        logger.debug(array_couponid_list.shape)

        # Compare coupon id between visit and list files. symmetric difference shows elements
        # present in one set but not the other.
        array_couponid_common = tuple(set(array_couponid).symmetric_difference(array_couponid_list))

        logger.debug(array_couponid_common)

        logger.debug(len(array_couponid_common))

        #  for i in range(0,len(array_couponid_common)):
        #      print array_couponid_common[i]

        f.close()

        os.chdir(self.cwd)
    def purchase_stats(self):

        os.chdir(self.data_dir)

        f = open('user_list.csv', 'r')

        numlines = sf.file_len(f.name)
        linenum = 0

        logger.debug("user_list.csv %d lines", numlines)

        # Create 2D array that can store number of purchases per userid
        array_userid_purchase = np.zeros((numlines-1, 2), dtype=object)

        for line in f:
            if linenum:  # Ignore first line as it's the title
                array_userid_purchase[linenum-1][0] = line.split(',')[5]
            else:
                print line.split(',')[5]
            linenum += 1

        logger.debug("array_userid_purchase size is %s\n", array_userid_purchase.shape)

        f.close()

        os.chdir(self.train_data_dir)

        # Get coupon purchase info from coupon detail file. Calculate # coupons purchased vs. user
        f = open('coupon_detail_train.csv', 'r')

        datavalid = 0

        for line in f:
            if datavalid:  # Ignore first line as it's the title

                # Find index for each userid in the array populated from user_list.csv
                index_row, index_col = np.where(array_userid_purchase == line.split(',')[4])

                # Sum of total coupons purchased for each user
                array_userid_purchase[index_row, index_col + 1] += int(line.split(',')[0])

            datavalid = 1

        f.close()

        np.save('purchase_total_by_user', array_userid_purchase)

        np.savetxt('purchase_total_by_user.log', array_userid_purchase, fmt='%s')

        os.chdir(self.cwd)

        return __pass__
    def plotdata(self):
        os.chdir(self.data_dir)

        fname = "user_list_mod.csv"

        nummembershipdays = np.zeros(sf.file_len(fname), int)
        linenum = 0

        # Get number of membership days data from input file
        f = open(fname, 'r')

        for line in f:
            nummembershipdays[linenum] = int(line.split(',')[4])
            linenum += 1

        f.close()