linkdict = { value + 1: links[value].strip() for value in range(num_of_links) } reverse_linkdict = { links[value].strip(): value + 1 for value in range(num_of_links) } # print(reverse_linkdict) os.chdir(folder_path) # change the director for the folder path # Created an instance of crawler and pass user number and links file into crawler = Crawler(user, "res.txt", reverse_linkdict, linkdict) # #Call crawl_and_createfile method to get all target links and create file for each source link crawler.crawl_and_createfile() fileprocess = FileProcessor(folder_path, user, num_of_links) fileprocess.file_filling() fileprocess.index_value() # fileprocess.index2pair() fileprocess.rename() # rename() fileprocess.create_pair_files('pair_dir') # if need for shuffle and reduce file fileprocess.max_len = fileprocess.find_largest() fileprocess.write_bin_files() if remapping: file_transfer = FileTransfer(users, folder_path, path)
for value in range(num_of_links) } uci_linkdict = linkdict reverse_linkdict = { links[value].strip(): value + 1 for value in range(num_of_links) } if recrawl: # print(len(reverse_linkdict.keys())) os.chdir(folder_path) # change the director for the folder path print("Start crawling") # Created an instance of crawler and pass user number and links file into crawler = Crawler(user, "res.txt", reverse_linkdict, linkdict) # #Call crawl_and_createfile method to get all target links and create file for each source link crawler.crawl_and_createfile(False, False) if reprocess: if not reinit: with open(dir + "/res.txt", "r") as f: num_of_links = len(f.readlines()) fileprocess = FileProcessor(folder_path, user, num_of_links, path) fileprocess.file_filling() fileprocess.index_value() fileprocess.rename() if remapping: if mode == 1: file_transfer = FileTransfer(user, folder_path, path, num_of_links) file_coded_transfer = FileCodedTransfer(user, folder_path, path,