def num_post_test1(num_posts, filename): """ s_fget_id_list_from_file try to get num_posts in single querry """ id_list = s_f.get_id_list_from_file(filename) page = "http://www.reddit.com/by_id/" suf = ".json" id_list = id_list[:num_posts] id_string = ",".join(id_list) id_string = id_string[1:] print id_string url = page + id_string + suf print url data = s_f.get_JSON_object_from_page(url, data=None, sleep=0) new_list = s_f.get_ids(data) id_list = id_list[1:] print id_list print new_list old_set = set(id_list) new_set = set(new_list) print "SIZE OF OLD: %d" % len(old_set) print "SIZE OF NEW %d" % len(new_set) if old_set == new_set: print "THE SETS ARE EQUAL" else: print "THE SETS ARE UNEQUAL"
f = open("/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names", 'r') except IOError: print ("IOError") pass str = f.read() f.close() l = str.split(',') print "Num total entries is: %d" % len(l) print "Num unique entries is: %d" % len(remove_duplicates(l)) if __name__=='__main__': filename = "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names" num_names() l = [1, 2, 3, 4, 4, 5, 6] print l l2 = remove_duplicates(l) print l print l2 print len(s_f.get_id_list_from_file(filename))
names = [name + ',' for name in names] f.writelines(names) time.sleep(45) f.close() def num_names(): try: f = open( "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names", 'r') except IOError: print("IOError") pass str = f.read() f.close() l = str.split(',') print "Num total entries is: %d" % len(l) print "Num unique entries is: %d" % len(remove_duplicates(l)) if __name__ == '__main__': filename = "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names" num_names() l = [1, 2, 3, 4, 4, 5, 6] print l l2 = remove_duplicates(l) print l print l2 print len(s_f.get_id_list_from_file(filename))