コード例 #1
0
ファイル: xiaoran.py プロジェクト: tianrenz2/Coded-MapReduce
            linkdict = {
                value + 1: links[value].strip()
                for value in range(num_of_links)
            }
            reverse_linkdict = {
                links[value].strip(): value + 1
                for value in range(num_of_links)
            }

        # print(reverse_linkdict)
        os.chdir(folder_path)  # change the director for the folder path

        # Created an instance of crawler and pass user number and links file into
        crawler = Crawler(user, "res.txt", reverse_linkdict, linkdict)
        # #Call crawl_and_createfile method to get all target links and create file for each source link
        crawler.crawl_and_createfile()

        fileprocess = FileProcessor(folder_path, user, num_of_links)
        fileprocess.file_filling()
        fileprocess.index_value()
        #    fileprocess.index2pair()
        fileprocess.rename()
        # rename()
        fileprocess.create_pair_files('pair_dir')
        # if need for shuffle and reduce file

        fileprocess.max_len = fileprocess.find_largest()
        fileprocess.write_bin_files()

    if remapping:
        file_transfer = FileTransfer(users, folder_path, path)
コード例 #2
0
            for value in range(num_of_links)
        }
        uci_linkdict = linkdict
        reverse_linkdict = {
            links[value].strip(): value + 1
            for value in range(num_of_links)
        }

    if recrawl:
        # print(len(reverse_linkdict.keys()))
        os.chdir(folder_path)  # change the director for the folder path
        print("Start crawling")
        # Created an instance of crawler and pass user number and links file into
        crawler = Crawler(user, "res.txt", reverse_linkdict, linkdict)
        # #Call crawl_and_createfile method to get all target links and create file for each source link
        crawler.crawl_and_createfile(False, False)

    if reprocess:
        if not reinit:
            with open(dir + "/res.txt", "r") as f:
                num_of_links = len(f.readlines())

        fileprocess = FileProcessor(folder_path, user, num_of_links, path)
        fileprocess.file_filling()
        fileprocess.index_value()
        fileprocess.rename()

    if remapping:
        if mode == 1:
            file_transfer = FileTransfer(user, folder_path, path, num_of_links)
            file_coded_transfer = FileCodedTransfer(user, folder_path, path,