コード例 #1
0
def num_post_test1(num_posts, filename):
    """
    s_fget_id_list_from_file
    try to get num_posts in single querry
    """
    id_list = s_f.get_id_list_from_file(filename)
    page = "http://www.reddit.com/by_id/"
    suf = ".json"
    id_list = id_list[:num_posts]

    id_string = ",".join(id_list)
    id_string = id_string[1:]
    print id_string

    url = page + id_string + suf
    print url

    data = s_f.get_JSON_object_from_page(url, data=None, sleep=0)

    new_list = s_f.get_ids(data)
    id_list = id_list[1:]
    print id_list
    print new_list

    old_set = set(id_list)
    new_set = set(new_list)
    print "SIZE OF OLD: %d" % len(old_set)
    print "SIZE OF NEW %d" % len(new_set)
    if old_set == new_set:
        print "THE SETS ARE EQUAL"
    else:
        print "THE SETS ARE UNEQUAL"
コード例 #2
0
        f = open("/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names", 'r')
    except IOError:
        print ("IOError")
        pass
    str = f.read()
    f.close()
    l = str.split(',')
    print "Num total entries is: %d" % len(l)
    print "Num unique entries is: %d" % len(remove_duplicates(l))
    
    

    
if __name__=='__main__':
    filename = "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names"
    num_names()
    l = [1, 2, 3, 4, 4, 5, 6]
    print l
    l2 = remove_duplicates(l)
    print l
    print l2
    print len(s_f.get_id_list_from_file(filename))
     
    
    
    
    
    
    
    
   
コード例 #3
0
        names = [name + ',' for name in names]
        f.writelines(names)
        time.sleep(45)
    f.close()


def num_names():
    try:
        f = open(
            "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names",
            'r')
    except IOError:
        print("IOError")
        pass
    str = f.read()
    f.close()
    l = str.split(',')
    print "Num total entries is: %d" % len(l)
    print "Num unique entries is: %d" % len(remove_duplicates(l))


if __name__ == '__main__':
    filename = "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names"
    num_names()
    l = [1, 2, 3, 4, 4, 5, 6]
    print l
    l2 = remove_duplicates(l)
    print l
    print l2
    print len(s_f.get_id_list_from_file(filename))