def get_non_retrieved(info_label): """ #@Argument: """ paths = file_loader.loadList(_output_path+"index.txt") current_user_index = pu.get_last_status(paths, log_path+"status.txt") i = 0 non_retrieved_list = [] for path in paths: label_found = False aux = 0 for line in open(path.lstrip("/").rstrip("\n")): #First line right after the label, therefore there is data (at least one!) if label_found == True: aux += 1 break if info_label in line: label_found = True if aux == 0: non_retrieved_list.append(path.rstrip("\n")) if i == current_user_index: break i = i + 1 print i, len(non_retrieved_list) file_loader.listToTXT(non_retrieved_list, _output_path+"non_retrieved.txt")
def get_all_lists(query, keyIndex = 3): """ #@Arguments #query = string to decide which kind of list must be retrived, i.e.: followers, friends, and etc.. #keyIndex = An integefer for screen_name, which we assume by default that is at position 3 #Return: None, it gets all values of given query and writes it on file #It is expected to have an index file with all users file_path """ paths = file_loader.loadList(_output_path+'index.txt') if paths == False: log.warning("No paths file to load") print "No paths file to load" sys.exit() current_user_index = pu.get_last_status(paths, log_path+"status.txt") print 'Starting at: ', paths[current_user_index].rstrip("\n") percentage = get_percentage(_output_path+'all-users.csv', query+"_count", current_user_index) last_user_index = len(paths) while current_user_index < last_user_index: #Loading from the account txt file which will create a dictionary at this point. user = pu.sort_dict(file_loader.loadCSV(paths[current_user_index], keyIndex)) screen_name = user.keys()[0] query_list = get_list(screen_name, query) percentage["collected_"+query+"_count"] += len(query_list) store_listed_info(query, query_list, screen_name, paths[current_user_index]) print "Status:", round(100*float(percentage["collected_"+query+"_count"])/percentage["total_"+query+"_count"], 2), "completed" current_user_index += 1