Exemple #1
0
def _find_glabels(asm):
    glabels = []
    for file in FileUtil.get_filenames_from_directory_recursive('.', '.s'):
        contents = FileUtil.get_text_from_file(file)
        matches = GLABEL_REGEX.findall(contents)
        for match in matches:
            reg = match[3]
            offsets = (match[2], match[10]
                       )  # ROM offsets of the lui and [sl][bhw]
            glabel_upper = int(match[5], 16)  # upper immediate of glabel
            lower_group = 15 if match[
                15] else 18  # lower immediate of the glabel
            is_addu = lower_group == 18
            glabel_lower = int(match[lower_group], 16)
            glabel_lower &= 0xFFFF
            if glabel_lower & 0x8000:
                glabel_upper -= 1
            glabel = 'D_%08X' % (glabel_upper << 16 | glabel_lower)
            if glabel not in IGNORE_GLABELS:
                glabels.append((glabel, *offsets))
                # replace upper instruction
                contents = re.sub(UPPER_INSTR_REGEX_TMPL % offsets[0],
                                  r'\1%s, %%hi(%s) # \2' % (reg, glabel),
                                  contents)
                # replace lower instruction
                contents = re.sub(LOWER_INSTR_LS_REGEX_TMPL % offsets[1],
                                  r'\1%%lo(%s)\5' % glabel, contents)
                contents = re.sub(
                    LOWER_INSTR_ADDU_SAME_REGEX_TMPL % offsets[1],
                    r'\1addiu \3, %%lo(%s) # \2' % glabel, contents)
                contents = re.sub(LOWER_INSTR_ADDU_REGEX_TMPL % offsets[1],
                                  r'\1%%lo(%s)' % glabel, contents)
        if len(matches):
            FileUtil.write_text_to_file(file, contents)
    return glabels
Exemple #2
0
    def finish(self):
        if not FileUtil.interview_exists():
            print('Please change to the root of the exam directory, then execute this command again.')
            exit(-1)

        # do not trust existing data, retrieve interview data from server again
        interview = FileUtil.read_interview('.')
        self.cerf_api = Cerf(interview['id'], interview['authcode'])

        interview = self.cerf_api.interview.retrieve(interview['id'])
        self.load_data(interview)

        if interview['time_spent']:
            print('Your exam is over. Please stay tuned.')
            exit(-1)

        spent = calc_time_spent(interview['started'])
        print('Thank you! Your exam is done! Total time spent: %d minutes.' % spent)

        print('Submitting your code to generate report...')
        self.submit_cases()
        print('Done!')

        print('Notifying the hiring manager...'),
        self.finish_interview()
        print('Done!')

        print('Please wait for a short moment. If no one comes in 5m, please inform frontdesk.')
Exemple #3
0
def _log_glabel_usage(glabels):
    """
    Returns:
        usage: A sorted map from glabel names to a sorted list of all the ROM
            addresses it is accessed from.
        c_file_offsets: A list of (filename, ROM offset) tuples from all the c
            files used.
    Parameters:
        glabels: output from _get_glabels.
    """
    usage = OrderedDict([(glabel, set()) for glabel in glabels])
    files = FileUtil.get_filenames_from_directory_recursive('.', ('.c', '.s'))
    c_file_offsets = []
    for file in files:
        contents = FileUtil.get_text_from_file(file)
        try:
            offset = _get_file_offset(file, contents)
            if file.endswith('.c'):
                c_file_offsets.append((file, offset))
            matches = re.findall(GLABEL_REGEX, contents)
            for glabel in matches:
                if glabel in usage:
                    usage[glabel].add(offset)
        except:
            pass
    for glabel in usage:
        usage[glabel] = sorted(list(usage[glabel]))
    c_file_offsets.sort(key=lambda f: f[1])
    return usage, c_file_offsets
Exemple #4
0
def main():
    FileUtil.set_working_dir_to_project_base()
    contents = FileUtil.get_text_from_file(
        'asm/non_matchings/unknown_005740/func_80005254.s')
    glabels = _find_glabels(contents)
    print('%d undeclared labels found:' % len(glabels))
    for glabel in glabels:
        print(glabel)
 def test_get_html(self):
     """
     Tests get_html method
     """
     file_util = FileUtil()
     expected_html = file_util.get_file_contents("example.html")
     html_requester = HtmlRequester()
     actual_html = html_requester.get_html("http://example.org")
     self.assertEqual(expected_html, actual_html)
Exemple #6
0
 def test_get_links(self):
     """
     Tests get_links method
     """
     file_util = FileUtil()
     expected_links = file_util.get_file_contents("links_test_data.txt")
     html_parser = HtmlParser()
     html_test_data = file_util.get_file_contents("html_test_data.html")
     actual_links = html_parser.get_links(html_test_data)
     self.assertEqual(expected_links, actual_links)
Exemple #7
0
 def test_get_web_pages(self):
     """
     Tests get_web_pages method
     """
     file_util = FileUtil()
     expected_web_pages = file_util.get_file_contents("web_pages_test_data.txt")
     html_parser = HtmlParser()
     same_hostname_urls = file_util.get_file_contents("same_hostname_urls_test_data.txt")
     actual_web_pages = html_parser.get_web_pages(same_hostname_urls)
     self.assertEqual(expected_web_pages, actual_web_pages)
def find_and_rename(directory, paths):
    for name in paths:
        path = directory + '/' + name
        if oldSymbol in path:
            newPath = path.replace(oldSymbol, newSymbol)
            try:
                FileUtil.rename_file(path, newPath)
                print('Renamed "' + path + '" to "' + newPath + '"')
            except FileNotFoundError:
                pass
 def mock_get_html(self, url):
     """
     Mocks the get_html method of the html_requester class to return the contents of html_test_data.html
     This mocking allows for inputting test html data without having to host it online.
     """
     if url == "http://www.domain.com":
         file_util = FileUtil()
         html_test_data = file_util.get_file_contents("html_test_data.html")
         return html_test_data
     else:
         return ""
Exemple #10
0
 def test_get_same_hostname_urls(self):
     """
     Tests get_same_hostname_urls method
     """
     file_util = FileUtil()
     expected_same_hostname_urls = file_util.get_file_contents("same_hostname_urls_test_data.txt")
     html_parser = HtmlParser()
     hostname = "http://www.domain.com/"
     links = file_util.get_file_contents("links_test_data.txt")
     actual_same_hostname_urls = html_parser.get_same_hostname_urls(hostname, links)
     self.assertEqual(expected_same_hostname_urls, actual_same_hostname_urls)
def load_data(path, dsetname=''):
    util = FileUtil(path)
    util.walk()
    for key in util.datums:
        if(len(dsetname) > 0):
            fname = util.datums[key].split('/')[-1]
            if(fname != dsetname):
                continue
        print('Load ', key)
        data = pd.read_csv(util.datums[key],
                           index_col="record_id")
        write_meta(list(data.columns.values),key)
    return data
Exemple #12
0
def link_features(path):
    # Load the datasets dataframes
    util = FileUtil(path)
    util.walk()
    for key in util.datums:
        fname = util.datums[key].split('/')[-1]
        if ('.features.csv' in fname):
            infile = util.datums[key]
            outfile = '/pfs/out/' + fname
            try:
                os.symlink(infile, outfile)
            except:
                print('Cannot create sim-link', infile, outfile)
Exemple #13
0
 def __init__(self, filename):
     with open(filename, 'rb') as romFile:
         self.fixedRomEndianess = False
         self.bytearray = romFile.read()
         self.bytes = list(self.bytearray)
         self.size = len(self.bytes)
         self._test_endianness()
         self.md5 = hashlib.md5(self.bytearray).hexdigest()
     if self.fixedRomEndianess:
         FileUtil.delete_file(filename)
         # Save the ROM as big-endian
         with open(filename[:-4] + '.z64', 'wb') as romFile:
             romFile.write(self.bytearray)
def main():
    FileUtil.set_working_dir_to_project_base()
    if len(sys.argv) != 2:
        show_help()
        return
    symbol, address = find_pairing(sys.argv[1])
    if symbol is None or address is None:
        if is_address(sys.argv[1]):
            print('No symbol was found for the address 0x%08X' % int(sys.argv[1], 16))
        else:
            print('No address was found for the symbol "%s"' % sys.argv[1])
    else:
        print('0x%08X = %s' % (address, symbol))
Exemple #15
0
def main():
    FileUtil.set_working_dir_to_project_base()
    data_glabels, rodata_glabels, bss_glabels = _get_glabels()
    for section in [('.data', data_glabels), ('.rodata', rodata_glabels),
                    ('.bss', bss_glabels)]:
        glabels = section[1]
        usage, c_file_offsets = _log_glabel_usage(glabels)
        filtered_usage = _filter_glabel_usage(usage)
        file_splits = _split_glabel_files(filtered_usage, c_file_offsets)
        print('File splits for %s:' % section[0])
        for split in file_splits:
            print('%s (%06X): %s' % split)
        print()
 def test_crawl(self):
     """
     Tests crawl method
     The get_html method of the html_requester class is mocked to return the contents of html_test_data.html.
     This mocking allows for inputting test html data without having to host it online.
     """
     file_util = FileUtil()
     expected_result = file_util.get_file_contents("crawl_test_data.txt")
     web_crawler = WebCrawler()
     web_crawler.html_requester.get_html = lambda url: self.mock_get_html(
         url)
     actual_result = web_crawler.crawl("http://www.domain.com")
     self.assertEqual(expected_result, actual_result)
Exemple #17
0
 def loadAllFromFiles() -> Dict[str, List[Dict[str, Any]]]:
   return {
     setName: FileUtil.getJSONContents(
       f"{SetUtil.CARDS_DIR}{os.sep}{sanitize(setName)}"
     )
     for setName in SetUtil.sets
   }
def load_data(path, dsetname=''):
    util = FileUtil(path)
    util.walk()
    for key in util.datums:
        if (len(dsetname) > 0):
            fname = util.datums[key].split('/')[-1]
            if (dsetname in fname):
                print('Load ', key)
                #data = pd.read_csv(util.datums[key],
                #           header=None)
                #write_meta(list(data.columns.values),key)

                copyfile(util.datums[key], )
            else:
                print(dsetname, key)
    return data
Exemple #19
0
def main():
  guides = FileUtil.getJSONContents('guides.json')
  if (guides == None):
    exit(1)

  successes = []
  failures  = [] # Tag yourself

  for g in guides:
    if (genGuide(guides[g])):
      successes.append(g)
    else:
      failures.append(g)
    print()

  if (len(successes) > 0):
    print('\n%d succssful generation(s):' % len(successes))
    for s in successes:
      print('\t%s' % s)
  else:
    print('0 successful generations')
  print()

  if (len(failures) > 0):
    print('%d failed generation(s):' % len(failures))
    for f in failures:
      print('\t%s' % f)
  else:
    print('0 failed generations')
  print()
def main():
    guides = FileUtil.getJSONContents('guides.json')
    if (guides == None):
        exit(1)

    validGuides = []
    invalidGuides = []

    for g in guides:
        if (validateGuide(guides[g])):
            validGuides.append(g)
        else:
            invalidGuides.append(g)
        print()

    if (len(validGuides) > 0):
        print('\n%d valid guide(s):' % len(validGuides))
        for vg in validGuides:
            print('\t%s' % vg)
    else:
        print('0 valid guides')
    print()

    if (len(invalidGuides) > 0):
        print('%d invalid guide(s):' % len(invalidGuides))
        for ivg in invalidGuides:
            print('\t%s' % ivg)
    else:
        print('0 invalid guides')
    print()
Exemple #21
0
 def submit_cases(self):
     path = os.getcwd()
     for root, dirs, files in os.walk('.'):
         for d in dirs:
             if d.startswith('case'):
                 config = FileUtil.read_case(os.path.join(path, d))
                 self.submit_case(config)
Exemple #22
0
def __read_config():
    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    settings_file = os.path.join(
        os.path.join(os.path.realpath(base_dir), "config"),
        "settings.yml")  # 全局配置文件的绝对路径
    account_file = os.path.join(
        os.path.join(os.path.realpath(base_dir), "config"), "account.yml")
    db_file = os.path.join(os.path.join(os.path.realpath(base_dir), "config"),
                           "db.yml")
    settings = FileUtil().connect_to(settings_file).parsed_data
    accounts = FileUtil().connect_to(account_file).parsed_data
    db = FileUtil().connect_to(db_file).parsed_data
    ENV = settings["env"]
    LOG_SWITCH = settings["log_switch"]
    LOG_LEVEL = settings["log_level"]
    PRINT_SWITCH = settings["print_switch"]
    return ENV, LOG_SWITCH, accounts, db, LOG_LEVEL, PRINT_SWITCH
Exemple #23
0
def validate_meta(path):
    util = FileUtil(path)
    util.walk()
    meta = []
    for key in util.datums:
        parts = key.split('.')
        for part in parts:
            if part != 'meta':
                continue
            with open(path + '/' + key, 'r') as f:
                reader = csv.reader(f)
                if (len(meta) == 0): meta = next(reader)
                for row in reader:
                    if meta != row:
                        print('Meta data does not match', meta, row)
                        return []
    return meta
Exemple #24
0
def get_data(path):
    util = FileUtil(path)
    util.walk()
    data = []
    name = ''
    for key in util.datums:
        print(key, util.datums[key])
        name = name + key
        with open(util.datums[key], 'r') as datum:
            reader = csv.reader(datum)
            for row in reader:
                data.append(row)
    if(len(data) > 0):
        result = merge_data(data)
        write(name, result)
    else:
        print('No datums collected')
Exemple #25
0
    def init():
        """
        Right now, this just gets the bearer token.
        """

        # Try getting the cached bearer token
        # TODO Factor this out into an abstract base class
        tokenFP = f"{FileUtil.TOKENS_FOLDER_PATH}{os.sep}{TCGPlayerAPI.KEY_NAME}.json"
        currTime = int(time.time())
        try:
            tokenInfo = FileUtil.getJSONContents(tokenFP)['bearer_token']
            tokenValue = tokenInfo['value']
            expireTime = tokenInfo['expires']
            if (currTime + 180) >= expireTime:
                TCGPlayerAPI.bearerToken = tokenValue
                haveToken = True
            else:
                haveToken = False
        except Exception as e:
            haveToken = False
            expireTime = currTime - 1

        if haveToken:
            print('Using cached bearer token for %s' % TCGPlayerAPI.KEY_NAME)
        else:
            print('Getting new bearer token for %s' % TCGPlayerAPI.KEY_NAME)
            r = requests.post(TCGPlayerAPI.GET_BEARER_TOKEN_URL,
                              data={
                                  'grant_type': 'client_credentials',
                                  'client_id': TCGPlayerAPI.keyInfo['public'],
                                  'client_secret':
                                  TCGPlayerAPI.keyInfo['private'],
                              })
            print(f"Request ({r.status_code}):\n{r.text}")
            respJSON = json.loads(r.text)
            # TODO: Make this not clobber everything else in the file
            # It's fine for now because there's nothing else, though
            FileUtil.writeJSONContents(
                tokenFP, {
                    'bearer_token': {
                        'value': respJSON['access_token'],
                        'expires': respJSON['expires_in'] + currTime
                    }
                })
            TCGPlayerAPI.bearerToken = respJSON['access_token']
Exemple #26
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_GenomeBrowser'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'kb_GenomeBrowser',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = kb_GenomeBrowser(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        suffix = int(time.time() * 1000)
        wsName = "test_GenomeBrowser_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': wsName})  # noqa
        cls.wsName = wsName

        cls.file_util = FileUtil(wsName, cls.wsURL, cls.callback_url)

        # Upload genomes
        base_gbk_file = "data/at_chrom1_section.gbk"
        gbk_file = os.path.join(cls.scratch, os.path.basename(base_gbk_file))
        shutil.copy(base_gbk_file, gbk_file)
        cls.genome_ref = cls.file_util.load_genbank_file(gbk_file, 'my_test_genome')
        # get gff file
        cls.gff_file = cls.file_util.get_gff_file(cls.genome_ref)
        # get fasta file
        cls.fasta_file = cls.file_util.get_fasta_file(cls.genome_ref)

        # Upload reads
        base_reads_file = "data/extracted_WT_rep1.fastq"
        reads_file = os.path.join(cls.scratch, os.path.basename(base_reads_file))
        shutil.copy(base_reads_file, reads_file)
        cls.reads_ref = cls.file_util.load_reads_file("illumina", reads_file, None, "my_reads_lib")

        # Upload alignments
        base_align_file = "data/at_chr1_wt_rep1_hisat2.bam"
        cls.bam_file = os.path.join(cls.scratch, os.path.basename(base_align_file))
        shutil.copy(base_align_file, cls.bam_file)
        cls.alignment_ref = cls.file_util.load_bam_file(cls.bam_file, cls.genome_ref, cls.reads_ref, 'my_hisat2_alignment')
Exemple #27
0
 def process_image_file(config, file):
     image = cv2.imread(file)
     if image is None:
         print("读取图片:%s失败!") % file
         return False
     convertor = ColorConvertorFactory.create_color_convertor(config.method)
     if convertor is None:
         return False
     output_image = convertor.convert(image, config)
     if output_image is None:
         return False
     output_path = ImagesColorProcessor.get_output_path(
         file, config.input_dir, config.output_dir)
     if config.overwrite and FileUtil.exists(output_path):
         FileUtil.delete(output_path)
     if not cv2.imwrite(output_path, output_image):
         print("输出图片到:%s失败!") % output_path
         return False
     return True
def load_processed(path, dsetname, columns, dtypes):
    util = FileUtil(path)
    util.walk()
    for key in util.datums:
        if(len(dsetname) > 0):
            fname = util.datums[key].split('/')[-1]
            if(fname != dsetname):
                continue
        print('Load ', key)
        data = pd.read_csv(util.datums[key],
                           header=None)
        data = data.drop(data.columns[1],axis=1)
        data.columns = columns
        for key in dtypes:
            data[key] = data[key].astype(dtypes[key])

        data = data.set_index('rec_id')
                           
    return data
Exemple #29
0
    def __init__(self, charInfoPath):
        """
        Slots (currently - adding weapons and trinkets later):
        'Back', 'Belt', 'Bracer', 'Chest', 'Feet',
        'Gloves', 'Head', 'Legs', 'Neck', 'Ring', 'Shoulder'
        """

        paths = {'allGear': 'AllGear.json', 'trinkets': 'Trinkets.json'}

        self.charInfo = FileUtil.getJSONContents(charInfoPath)

        allGearList = FileUtil.getJSONContents(paths['allGear'])
        allTrinketsList = FileUtil.getJSONContents(paths['trinkets'])

        # They don't exlicitly say that they're trinkets
        for trink in allTrinketsList:
            trink['Slot'] = 'Trinket'

        # Combine all gear into one list
        self.allGear = []
        self.allGear.extend(allGearList)
        self.allGear.extend(allTrinketsList)

        # Then turn that list into a map from name to the piece of gear
        self.allGear = DataUtil.toMap(self.allGear, 'Name')

        # Load the current gear into memory
        self.currentGear = DataUtil.statifyNamedGear(
            self.charInfo['Current Gear'], self.allGear)

        # TODO
        # SEE IF THIS DOESN'T BREAK THINGS LATER ON IN EXECUTION
        # (Might not be kosher if slotified this early)
        self.allGear = CalcUtil.slotifyAllGear(self.allGear)

        # Calculate each piece's DPS
        for name in self.currentGear:
            piece = self.currentGear[name]
            piece['DPS'] = CalcUtil.calcDPS(piece, self.charInfo)

        # Get some basic overall stats about the current gear
        self.totalStats = CalcUtil.getTotalStats(self.currentGear,
                                                 Globals.allStats)
Exemple #30
0
def _get_glabels():
    """
    Returns all the glabel definitions in the data file, split into .data,
    .rodata, and .bss.
    """
    data_file = FileUtil.get_text_from_file(DATA_FILE_PATH)
    glabels = re.findall(GLABEL_DEF_REGEX, data_file)
    glabels = [glabel for glabel in glabels if glabel not in IGNORE_GLABELS]
    rodata_idx = glabels.index(RODATA_START)
    bss_idx = glabels.index(BSS_START)
    return glabels[:rodata_idx], glabels[rodata_idx:bss_idx], glabels[bss_idx:]
Exemple #31
0
def main():
    if not dataPath or (not msgPath):
        return

    svn = pysvn.Client()

    versionStr = ""
    lenList = len(allPath)
    for index, tPath in enumerate(allPath):
        LogList = svn.log(tPath, limit=1)
        info = LogList[0]
        versionNum = info.revision.number
        if index == (lenList - 1):
            versionStr += str(versionNum)
        else:
            versionStr += str(versionNum)
            versionStr += "."

    exportPath1 = op.join(op.dirname(dataPath), "version")
    exportPath = op.join(exportPath1, 'version.txt')

    f = open(exportPath, 'w')
    f.write(versionStr)
    f.close()

    SvnCmd(path=exportPath,
           cmd='commit',
           logmsg='Commit version text.{}||{}'.format(exportPath1,
                                                      SERV_VERSION_DIR)).Run()

    # copy version 到 trunk
    SvnCmd(path=SERV_VERSION_DIR, cmd='update',
           logmsg='update version files.').Run()

    update_files_version, add_files_version = FileUtil.copy(
        exportPath1, SERV_VERSION_DIR, suffixes=('.txt', ), force=True)

    # print("{} .txt files {}".format("#" * 16, "#" * 16))
    # print("updated:")
    # # prettyOutput(update_files_version)
    # print('')

    if add_files_version:
        print("added:")
        prettyOutput(add_files_version)
        for new_file in add_files_version:
            dst_file = op.join(SERV_VERSION_DIR, op.basename(new_file))
            SvnCmd(path=dst_file,
                   cmd='add',
                   logmsg='add new config data files').Run()

    SvnCmd(path=SERV_VERSION_DIR, cmd='commit',
           logmsg='commit version files').Run()
Exemple #32
0
def calculate_matches():
    global mapFile, RAM_TO_ROM
    REGEX_MAP_GET_LABEL = r"[ ]*?0x[0-9A-Fa-f]{8}([0-9A-Fa-f]{8})[ ]*?([_A-Za-z0-9]+)"
    mapText = FileUtil.get_text_from_file(MAP_FILEPATH)
    mapMatches = getMatches(mapText, REGEX_MAP_GET_LABEL)
    for i in range(0, len(mapMatches) - 1):
        match = mapMatches[i]
        labelValue = match[1]
        value = int(labelValue[0], 16)
        length = int(mapMatches[i + 1][1][0], 16) - value
        mapFile[labelValue[1]] = {"value": value, "length": length}
    RAM_TO_ROM = mapFile['__RAM_TO_ROM']['value']
Exemple #33
0
    def submit_case(self, case):
        path = os.path.join(os.getcwd(), 'case%s' % case['position'])
        print('\tSubmit case%s...' % case['position']),
        extentions = [ext.strip() for ext in case['extentions'].split(',')]
        first_list, second_list = FileUtil.get_valid_files(path, extentions)
        content = ''
        for name in first_list + second_list:
            s = '/* %s */\n\n%s' % (name, FileUtil.read_content(os.path.join(path, name)))
            content += s

        data = {
            'interview': self.id,
            'applicant': self.interview['applicant_id'],
            'case': case['cid'],
            'content': content
        }

        if not self.cerf_api.answer.create(data):
            print('Cannot submit case%s, please contact your hiring manager.' % case['position'])
            # do not bail out so that we could try the latter cases.
            # exit(-1)
        else:
            print('Done!')
	def __init__(self, old_file, new_file, filename=""):
		if filename:
			self.fu = FileUtil(filename)
			self.fu.get_structure()
		self.old_file = old_file
		self.new_file = new_file
class Feature:
	def __init__(self, old_file, new_file, filename=""):
		if filename:
			self.fu = FileUtil(filename)
			self.fu.get_structure()
		self.old_file = old_file
		self.new_file = new_file
		# self.db = Database(dbname)

	def save_reviewerid(self):
		reviewIds = self.fu.get_memberId_list()
		with open(self.old_file, 'a') as fp:
			review_txt = ""
			for review_id in reviewIds:
				review_txt += review_id + '\n'
			fp.write(review_txt)
		# for review_id  in reviewIds:
			# self.db.insert_into_features({'review_id': review_id})

	def save_f1(self):
		features = self.fu.get_feedback_list()
		review_txt = ""
		with open(self.old_file) as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + features[index] + '\n'
		with open(self.new_file + '1', 'w') as fp:
			fp.write(review_txt) 

	def save_f2(self):
		help_features = self.fu.get_help_feedback_list()
		review_txt = ""
		with open(self.old_file + '1') as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + help_features[index] + '\n'
		with open(self.new_file + '2', 'w') as fp:
			fp.write(review_txt) 

	def save_f3(self):
		review_txt = ""
		with open(self.old_file + '2') as fp:
			for line in fp.readlines():
				features = line.split('\t')
				if float(features[1]) == 0:
					review_txt += line.replace('\n', '') + '\t' + '0.0\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + str(float(features[2])/float(features[1])) + '\n'
		with open(self.new_file + '3', 'w') as fp:
			fp.write(review_txt) 

	def save_f4(self):
		title_list = self.fu.get_title_list()
		review_txt = ""
		with open(self.old_file + '3') as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + str(len(title_list[index].split(' '))) + '\n'
		with open(self.new_file + '4', 'w') as fp:
			fp.write(review_txt) 

	def save_f5(self):
		content_list = self.fu.get_content_list()
		review_txt = ""
		with open(self.old_file + '4') as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + str(len(content_list[index].split(' '))) + '\n'
		with open(self.new_file + '5', 'w') as fp:
			fp.write(review_txt) 

	def save_f6(self):
		reviewer_product_date_list = self.fu.get_column_list([1,2])
		review_txt = ""
		dict = {}
		for idx, reviewer_product_date in enumerate(reviewer_product_date_list):
			product = reviewer_product_date[0]
			date = reviewer_product_date[1]
			if product not in dict:
				dict[product] = {}
			try:
				dict[product][idx] = parse(date)
			except:
				print date
		rank_list = rank_dict(dict, False)
		with open(self.old_file + '5') as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + str(rank_list[product][index] + 1) + '\n'
		with open(self.new_file + '6', 'w') as fp:
			fp.write(review_txt) 

	def save_f7(self):
		reviewer_product_date_list = self.fu.get_column_list([1,2])
		review_txt = ""
		dict = {}
		for idx, reviewer_product_date in enumerate(reviewer_product_date_list):
			product = reviewer_product_date[0]
			date = reviewer_product_date[1]
			if product not in dict:
				dict[product] = {}
			try:
				dict[product][idx] = parse(date)
			except:
				print date
		rank_list = rank_dict(dict, True)
		# with open('review_product_rank', 'w') as fp:
		# 	fp.write(str(dict))
		with open(self.old_file + '6') as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + str(rank_list[product][index] + 1) + '\n'
		with open(self.new_file + '7', 'w') as fp:
			fp.write(review_txt) 

	def save_f8(self):
		review_txt = ""
		with open(self.old_file + '7') as fp:
			for index, line in enumerate(fp.readlines()):
				if line.split('\t')[6] == '1': 
					review_txt += line.replace('\n', '') + '\t' + '1\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'
		with open(self.new_file + '8', 'w') as fp:
			fp.write(review_txt) 

	def save_f9(self):
		review_txt = ""
		with open(self.old_file + '8') as fp:
			for index, line in enumerate(fp.readlines()):
				if line.split('\t')[6] == '1' and line.split('\t')[7] == '1': 
					review_txt += line.replace('\n', '') + '\t' + '1\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'
		with open(self.new_file + '9', 'w') as fp:
			fp.write(review_txt)

	def save_f10(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		positive_words = []
		with open('../opinion-lexicon-English/positive-words.txt') as fp:
			positive_words = [word for word in fp.readlines()]
		with open(self.old_file + '9') as fp:
			for index, line in enumerate(fp.readlines()):
				content = content_list[index].split(' ')
				content_len = len(content)
				positive_len = 0.0
				for word in content:
					if word.lower() in positive_words:
						positive_len += 1
						print positive_len
				review_txt += line.replace('\n', '') + '\t' + str(positive_len / content_len) +'\n'
		with open(self.new_file + '10', 'w') as fp:
			fp.write(review_txt)

	def save_f11(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		negative_words = []
		with open('../opinion-lexicon-English/negative-words.txt') as fp:
			negative_words = [word for word in fp.readlines()]
		print len(negative_words)
		with open(self.old_file + '10') as fp:
			for index, line in enumerate(fp.readlines()):
				content = content_list[index].split(' ')
				content_len = len(content)
				negative_len = 0.0
				for word in content:
					if word.lower() in negative_words:
						negative_len += 1
						print negative_len
				review_txt += line.replace('\n', '') + '\t' + str(negative_len / content_len) +'\n'
		with open(self.new_file + '11', 'w') as fp:
			fp.write(review_txt)

	def save_f12(self):
		review_txt = ""
		product_content_list = self.fu.get_column_list([1,-1])
		product_feature_list = {}
		with open('../AmazonDataBackup/productInfoXML-reviewed-mProducts.features') as fp:
			for line in fp:
				product_id = line.split('\t')[0]
				product_feature = line.split('\t')[1]
				product_feature_list[product_id] = product_feature
		with open(self.old_file + '11') as fp:
			for index, line in enumerate(fp.readlines()):
				product_id = product_content_list[index][0]
				content = product_content_list[index][1].lower()
				product_feature = product_feature_list[product_id]
				cos_sim = get_cosine(text_to_vector(content), text_to_vector(product_feature))
				review_txt += line.replace('\n', '') + '\t' + str(cos_sim) +'\n'
		with open(self.new_file + '12', 'w') as fp:
			fp.write(review_txt)

	def save_f13(self):
		review_txt = ""
		product_content_list = self.fu.get_column_list([1,-1])
		p_b_dict = product_brand_dict('../AmazonDataBackup/productInfoXML-reviewed-mProducts.copy')
		
		with open(self.old_file + '12') as fp:
			for index, line in enumerate(fp.readlines()):
				product_id = product_content_list[index][0]
				content = product_content_list[index][1].lower()
				content = WORD.findall(content)
				counted_content = Counter(content)
				brand = p_b_dict[product_id]
				try:
					brand_num = counted_content[brand]
					review_txt += line.replace('\n', '') + '\t' + str(float(brand_num) / len(content)) +'\n'
				except:
					brand_num = 0
					review_txt += line.replace('\n', '') + '\t' + '0\n'
				
		with open(self.new_file + '13', 'w') as fp:
			fp.write(review_txt)

	def save_f14(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		format = re.compile(r'\d+')
		with open(self.old_file + '13') as fp:
			for index, line in enumerate(fp.readlines()):
				content = content_list[index]
				number = len(format.findall(content))
				content = re.compile(r'\w+').findall(content)
				if len(content):
					review_txt += line.replace('\n', '') + '\t' + str(float(number) / len(content)) +'\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'
				
		with open(self.new_file + '14', 'w') as fp:
			fp.write(review_txt)

	def save_f15(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		with open(self.old_file + '14') as fp:
			for index, line in enumerate(fp.readlines()):
				content = content_list[index]
				capital_num = sum(1 for c in content if c.isupper())
				content = re.compile(r'\w+').findall(content)
				if len(content):
					review_txt += line.replace('\n', '') + '\t' + str(float(capital_num) / len(content)) +'\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'
				
		with open(self.new_file + '15', 'w') as fp:
			fp.write(review_txt)

	def save_f16(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		with open(self.old_file + '15') as fp:
			for index, line in enumerate(fp.readlines()):
				content = re.compile(r'\w+').findall(content_list[index])
				capital_num = sum(1 for c in content if c.isupper())
				# content = re.compile(r'\w+').findall(content)
				if len(content):
					review_txt += line.replace('\n', '') + '\t' + str(float(capital_num) / len(content)) +'\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'
				
		with open(self.new_file + '16', 'w') as fp:
			fp.write(review_txt)

	def save_f17(self):
		review_txt = ""
		rating_list = self.fu.get_rating_list()
		with open(self.old_file + '16') as fp:
			for index, line in enumerate(fp.readlines()):
				review_txt += line.replace('\n', '') + '\t' + str(rating_list[index]) +'\n'
				
		with open(self.new_file + '17', 'w') as fp:
			fp.write(review_txt)

	def save_f18(self):
		review_txt = ""
		product_rating_list = self.fu.get_column_list([1, 5])
		product_avg_rating_dict = product_avg_rating(product_rating_list)
		with open(self.old_file + '17') as fp:
			for index, line in enumerate(fp.readlines()):
				product_id = product_rating_list[index][0]
				rating = product_rating_list[index][1]
				review_txt += line.replace('\n', '') + '\t' + str(float(rating) - product_avg_rating_dict[product_id]) +'\n'
				
		with open(self.new_file + '18', 'w') as fp:
			fp.write(review_txt)

	def save_f19(self):
		review_txt = ""
		rating_list = self.fu.get_rating_list()
		with open(self.old_file + '18') as fp:
			for index, line in enumerate(fp.readlines()):
				try:
					rating = float(rating_list[index])
					if rating >= 4:
						review_txt += line.replace('\n', '') + '\t' + '1\n'
					elif rating <= 2.5:
						review_txt += line.replace('\n', '') + '\t' + '-1\n'
					else:
						review_txt += line.replace('\n', '') + '\t' + '0\n'
				except:
					print index
				
		with open(self.new_file + '19', 'w') as fp:
			fp.write(review_txt)

	def save_f20(self):
		review_txt = ""
		reviewer_product_date_list = self.fu.get_column_list([1,2])
		review_txt = ""
		dict = {}
		for idx, reviewer_product_date in enumerate(reviewer_product_date_list):
			product = reviewer_product_date[0]
			date = reviewer_product_date[1]
			if product not in dict:
				dict[product] = {}
			try:
				dict[product][idx] = parse(date)
			except:
				print date
		rank_list = rank_dict(dict, False)
		with open(self.old_file + '19') as fp:
			features = fp.readlines()
			for index, line in enumerate(features):
				product_id = reviewer_product_date_list[index][0]
				rank = rank_list[product_id][index]
				rating_type = int(line.split('\t')[19])
				if rank == 1 and rating_type == -1:
					# print rank_list[product_id]
					# print rank_list[product_id]
					# print index
					first_review_index = 0
					for review in rank_list[product_id].keys():
						if rank_list[product_id][review] == 0:
							first_review_index = review
					if int(features[first_review_index].split('\t')[19]) == 1:
						print index, first_review_index
						review_txt += line.replace('\n', '') + '\t' + '1\n'
					else:
						review_txt += line.replace('\n', '') + '\t' + '0\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'

				
		with open(self.new_file + '20', 'w') as fp:
			fp.write(review_txt)

	def save_f21(self):
		review_txt = ""
		reviewer_product_date_list = self.fu.get_column_list([1,2])
		review_txt = ""
		dict = {}
		for idx, reviewer_product_date in enumerate(reviewer_product_date_list):
			product = reviewer_product_date[0]
			date = reviewer_product_date[1]
			if product not in dict:
				dict[product] = {}
			try:
				dict[product][idx] = parse(date)
			except:
				print date
		rank_list = rank_dict(dict, True)
		with open(self.old_file + '20') as fp:
			features = fp.readlines()
			for index, line in enumerate(features):
				product_id = reviewer_product_date_list[index][0]
				rank = rank_list[product_id][index]
				rating_type = int(line.split('\t')[19])
				if rank == 1 and rating_type == 1:
					# print rank_list[product_id]
					# print rank_list[product_id]
					# print index
					first_review_index = 0
					for review in rank_list[product_id].keys():
						if rank_list[product_id][review] == 0:
							first_review_index = review
					if int(features[first_review_index].split('\t')[19]) == -1:
						print index, first_review_index
						review_txt += line.replace('\n', '') + '\t' + '1\n'
					else:
						review_txt += line.replace('\n', '') + '\t' + '0\n'
				else:
					review_txt += line.replace('\n', '') + '\t' + '0\n'

				
		with open(self.new_file + '21', 'w') as fp:
			fp.write(review_txt)

	def save_f22(self):
		review_txt = ""
		
		with open(self.old_file + '21') as fp:
			reviewers = {}
			lines = fp.readlines()
			for line in lines:
				features = line.split('\t')
				if not reviewers.has_key(features[0]):
					reviewers[features[0]] = {'review_num': 0, 'first_review_num': 0.0}
				if int(features[8]) == 1:
					reviewers[features[0]]['first_review_num'] += 1
				reviewers[features[0]]['review_num'] += 1
			# features = [line.split('\t') for line in lines]
			print 'finish features'
			for index, line in enumerate(lines):
				features = line.split('\t')
				review_id = features[0]
				review_num = reviewers[review_id]['review_num']
				first_review_num = reviewers[review_id]['first_review_num']
				if first_review_num > 0 and first_review_num != review_num:
					print index, first_review_num
				review_txt += lines[index].replace('\n', '') + '\t' + str(first_review_num / review_num) +'\n'

				
		with open(self.new_file + '22', 'w') as fp:
			fp.write(review_txt)

	def save_f23(self):
		review_txt = ""
		
		with open(self.old_file + '22') as fp:
			reviewers = {}
			lines = fp.readlines()
			for line in lines:
				features = line.split('\t')
				if not reviewers.has_key(features[0]):
					reviewers[features[0]] = {'review_num': 0, 'only_review_num': 0.0}
				if int(features[9]) == 1:
					reviewers[features[0]]['only_review_num'] += 1
				reviewers[features[0]]['review_num'] += 1
			# features = [line.split('\t') for line in lines]
			print 'finish features'
			for index, line in enumerate(lines):
				features = line.split('\t')
				review_id = features[0]
				review_num = reviewers[review_id]['review_num']
				only_review_num = reviewers[review_id]['only_review_num']
				if only_review_num > 0 and only_review_num != review_num:
					print index, only_review_num
				review_txt += lines[index].replace('\n', '') + '\t' + str(only_review_num / review_num) +'\n'

				
		with open(self.new_file + '23', 'w') as fp:
			fp.write(review_txt)

	def save_f24(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = reviewer_rating[1]
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = {'ratings': [], 'avg_rating': 0.0}
			reviewers[reviewer_id]['ratings'].append(float(rating))

		for reviewer_id in reviewers.keys():
			ratings = reviewers[reviewer_id]['ratings']
			reviewers[reviewer_id]['avg_rating'] = sum(ratings) / len(ratings)

		with open(self.old_file + '23') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				review_txt += lines[index].replace('\n', '') + '\t' + str(reviewers[reviewer_id]['avg_rating']) +'\n'

				
		with open(self.new_file + '24', 'w') as fp:
			fp.write(review_txt)

	def save_f25(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = reviewer_rating[1]
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = {'ratings': [], 'avg_rating': 0.0, 'std_rating': 0.0}
			reviewers[reviewer_id]['ratings'].append(float(rating))

		for reviewer_id in reviewers.keys():
			ratings = reviewers[reviewer_id]['ratings']
			reviewers[reviewer_id]['avg_rating'] = sum(ratings) / len(ratings)
			

		for reviewer_id in reviewers.keys():
			ratings = reviewers[reviewer_id]['ratings']
			avg_rating = reviewers[reviewer_id]['avg_rating']
			std_rating = math.sqrt(sum([ (rating - avg_rating)**2 for rating in ratings ]))
			reviewers[reviewer_id]['std_rating'] = std_rating

		with open(self.old_file + '24') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				review_txt += lines[index].replace('\n', '') + '\t' + str(reviewers[reviewer_id]['std_rating']) +'\n'

				
		with open(self.new_file + '25', 'w') as fp:
			fp.write(review_txt)

	def save_f26(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = float(reviewer_rating[1])
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = []
			rating_flag = 1
			if rating >= 4:
				rating_flag = 1
			elif rating < 2.5:
				rating_flag = -1
			else:
				rating_flag = 0
			reviewers[reviewer_id].append(rating_flag)

		with open(self.old_file + '25') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				if all_same(reviewers[reviewer_id]):
					review_txt += lines[index].replace('\n', '') + '\t' +'1\n'
				else:
					review_txt += lines[index].replace('\n', '') + '\t' +'0\n'

				
		with open(self.new_file + '26', 'w') as fp:
			fp.write(review_txt)

	def save_f27(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = float(reviewer_rating[1])
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = {'good': False, 'avg': False, 'bad': False}
			if rating >= 4:
				reviewers[reviewer_id]['good'] = True
			elif rating < 2.5:
				reviewers[reviewer_id]['bad'] = True
			else:
				reviewers[reviewer_id]['avg'] = True
			

		with open(self.old_file + '26') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				if reviewers[reviewer_id]['good'] and reviewers[reviewer_id]['bad'] and not reviewers[reviewer_id]['avg']:
					print index
					review_txt += lines[index].replace('\n', '') + '\t' +'1\n'
				else:
					review_txt += lines[index].replace('\n', '') + '\t' +'0\n'

				
		with open(self.new_file + '27', 'w') as fp:
			fp.write(review_txt)

	def save_f28(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = float(reviewer_rating[1])
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = {'good': False, 'avg': False, 'bad': False}
			if rating >= 4:
				reviewers[reviewer_id]['good'] = True
			elif rating < 2.5:
				reviewers[reviewer_id]['bad'] = True
			else:
				reviewers[reviewer_id]['avg'] = True
			

		with open(self.old_file + '27') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				if reviewers[reviewer_id]['good'] and reviewers[reviewer_id]['avg'] and not reviewers[reviewer_id]['bad']:
					print index
					review_txt += lines[index].replace('\n', '') + '\t' +'1\n'
				else:
					review_txt += lines[index].replace('\n', '') + '\t' +'0\n'

				
		with open(self.new_file + '28', 'w') as fp:
			fp.write(review_txt)

	def save_f29(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = float(reviewer_rating[1])
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = {'good': False, 'avg': False, 'bad': False}
			if rating >= 4:
				reviewers[reviewer_id]['good'] = True
			elif rating < 2.5:
				reviewers[reviewer_id]['bad'] = True
			else:
				reviewers[reviewer_id]['avg'] = True
			

		with open(self.old_file + '28') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				if reviewers[reviewer_id]['bad'] and reviewers[reviewer_id]['avg'] and not reviewers[reviewer_id]['good']:
					print index
					review_txt += lines[index].replace('\n', '') + '\t' +'1\n'
				else:
					review_txt += lines[index].replace('\n', '') + '\t' +'0\n'

				
		with open(self.new_file + '29', 'w') as fp:
			fp.write(review_txt)

	def save_f30(self):
		review_txt = ""
		reviewer_rating_list = self.fu.get_column_list([0,5])
		reviewers = {}
		for reviewer_rating in reviewer_rating_list:
			reviewer_id = reviewer_rating[0]
			rating = float(reviewer_rating[1])
			if not reviewers.has_key(reviewer_id):
				reviewers[reviewer_id] = {'good': False, 'avg': False, 'bad': False}
			if rating >= 4:
				reviewers[reviewer_id]['good'] = True
			elif rating < 2.5:
				reviewers[reviewer_id]['bad'] = True
			else:
				reviewers[reviewer_id]['avg'] = True
			

		with open(self.old_file + '29') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				if reviewers[reviewer_id]['bad'] and reviewers[reviewer_id]['avg'] and reviewers[reviewer_id]['good']:
					print index
					review_txt += lines[index].replace('\n', '') + '\t' +'1\n'
				else:
					review_txt += lines[index].replace('\n', '') + '\t' +'0\n'

				
		with open(self.new_file + '30', 'w') as fp:
			fp.write(review_txt)

	def save_f31(self):
		review_txt = ""
		reviewers = {}
		with open(self.old_file + '30') as fp:
			for features in fp.readlines():
				features = features.split('\t')
				reviewer_id = features[0]
				if not reviewers.has_key(reviewer_id):
					reviewers[reviewer_id] = {'total': 0.0, 'first': 0.0}
				reviewers[reviewer_id]['total'] += 1
				if int(features[20]):
					reviewers[reviewer_id]['first'] += 1


		with open(self.old_file + '30') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				review_txt += lines[index].replace('\n', '') + '\t' + str(reviewers[reviewer_id]['first'] / reviewers[reviewer_id]['total']) +'\n'
				if reviewers[reviewer_id]['first'] and reviewers[reviewer_id]['first'] != 1:
					print index 
				
		with open(self.new_file + '31', 'w') as fp:
			fp.write(review_txt)

	def save_f32(self):
		review_txt = ""
		reviewers = {}
		with open(self.old_file + '31') as fp:
			for features in fp.readlines():
				features = features.split('\t')
				reviewer_id = features[0]
				if not reviewers.has_key(reviewer_id):
					reviewers[reviewer_id] = {'total': 0.0, 'first': 0.0}
				reviewers[reviewer_id]['total'] += 1
				if int(features[21]):
					reviewers[reviewer_id]['first'] += 1


		with open(self.old_file + '31') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				features = line.split('\t')
				reviewer_id = features[0]
				review_txt += lines[index].replace('\n', '') + '\t' + str(reviewers[reviewer_id]['first'] / reviewers[reviewer_id]['total']) +'\n'
				if reviewers[reviewer_id]['first'] and reviewers[reviewer_id]['first'] != 1:
					print index 
				
		with open(self.new_file + '32', 'w') as fp:
			fp.write(review_txt)

	def save_f33(self):
		review_txt = ""
		product_price = product_price_dict('../AmazonDataBackup/productInfoXML-reviewed-mProducts.copy')
		product_list = self.fu.get_productId_list()
		with open(self.old_file + '32') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				product_id = product_list[index]
				review_txt += lines[index].replace('\n', '') + '\t' + str(product_price[product_id]) +'\n'
				
		with open(self.new_file + '33', 'w') as fp:
			fp.write(review_txt)

	def save_f34(self):
		review_txt = ""
		product_rank = product_rank_dict('../AmazonDataBackup/productInfoXML-reviewed-mProducts.copy')
		product_list = self.fu.get_productId_list()
		with open(self.old_file + '33') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				product_id = product_list[index]
				review_txt += lines[index].replace('\n', '') + '\t' + str(product_rank[product_id]) +'\n'
				
		with open(self.new_file + '34', 'w') as fp:
			fp.write(review_txt)

	def save_f35(self):
		review_txt = ""
		product_list = self.fu.get_productId_list()
		product_rank = product_rank_dict('../AmazonDataBackup/productInfoXML-reviewed-mProducts.copy')
		product_rating_list = self.fu.get_column_list([1, 5])
		products = {}
		for product_rating in product_rating_list:
			product_id = product_rating[0]
			rating = float(product_rating[1])
			if not products.has_key(product_id):
				products[product_id] = {'ratings': [], 'avg': 0.0}
			products[product_id]['ratings'].append(rating)

		for product_id in products.keys():
			products[product_id]['avg'] = sum(products[product_id]['ratings']) / len(products[product_id]['ratings'])

		with open(self.old_file + '34') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				product_id = product_list[index]
				review_txt += lines[index].replace('\n', '') + '\t' + str(products[product_id]['avg']) +'\n'

				if index == 0:
					print products[product_id]
				
		with open(self.new_file + '35', 'w') as fp:
			fp.write(review_txt)

	def save_f36(self):
		review_txt = ""
		product_list = self.fu.get_productId_list()
		product_rank = product_rank_dict('../AmazonDataBackup/productInfoXML-reviewed-mProducts.copy')
		product_rating_list = self.fu.get_column_list([1, 5])
		products = {}
		for product_rating in product_rating_list:
			product_id = product_rating[0]
			rating = float(product_rating[1])
			if not products.has_key(product_id):
				products[product_id] = {'ratings': [], 'avg': 0.0, 'std': 0.0}
			products[product_id]['ratings'].append(rating)

		for product_id in products.keys():
			products[product_id]['avg'] = sum(products[product_id]['ratings']) / len(products[product_id]['ratings'])

		for product_id in products.keys():
			avg = products[product_id]['avg']
			std = math.sqrt(sum([(rating - avg)**2 for rating in products[product_id]['ratings']]))
			products[product_id]['std'] = std

		with open(self.old_file + '35') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				product_id = product_list[index]
				review_txt += lines[index].replace('\n', '') + '\t' + str(products[product_id]['std']) +'\n'

				if index == 0:
					print products[product_id]
				
		with open(self.new_file + '36', 'w') as fp:
			fp.write(review_txt)

	def save_labels(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		print 'get content list'
		grams_list = []
		for content in content_list:
			grams_list.append(get_2_grams(content))
		print 'get grams list'
		label_list = []
		content_len = len(content_list)
		for x in xrange(0,content_len):
			label_list.append(0)
		print 'start labeling'
		for i in xrange(0,content_len):
			grams_a = grams_list[i]
			for j in xrange(i+1,content_len):
				grams_b = grams_list[j]
				sim = jaccard_distance(grams_a, grams_b)
				if sim >= 0.9:
					print "sim is : " , sim
					label_list[i] = 1
					label_list[j] = 1
		with open(self.old_file + '36') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				product_id = product_list[index]
				review_txt += lines[index].replace('\n', '') + '\t' + str(label_list[index]) +'\n'
				
		with open(self.new_file + '37', 'w') as fp:
			fp.write(review_txt)