def get_new_post_names(time_period):
    '''gathers new post titles for time_period minutes
    inserts the names into a file to be used later'''
    try:
        f = open(
            "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names",
            'a')
    except IOError:
        print("IOError")
        pass

    current = time.time()
    print current
    finish = current + (60 * time_period)
    print finish

    while finish >= time.time():
        names = s_f.get_post_id_list(1,
                                     dir='new',
                                     paramType='sort',
                                     param='new')
        names = [name + ',' for name in names]
        f.writelines(names)
        time.sleep(45)
    f.close()
def get_new_post_names(time_period):
    '''gathers new post titles for time_period minutes
    inserts the names into a file to be used later'''
    try:
        f = open("/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names", 'a')
    except IOError:
        print ("IOError")
        pass
    
    current = time.time()
    print current
    finish = current + (60*time_period)
    print finish
    
    while finish >= time.time():
        names = s_f.get_post_id_list(1, dir='new', paramType='sort', param='new')
        names = [name+',' for name in names]
        f.writelines(names)
        time.sleep(45)
    f.close()
Example #3
0
def get_new_posts(timeSeries, updateQueue):
    '''
    scrape new page
    check post id's against those already in timeSeries (dictionary)
    input new posts to timeSeries and updateQueue (PriorityQueue)
    '''
    IDList = s_f.get_post_id_list(1,"", "new", "sort", "new")
    NewIDs = []
    if IDList == None:
        return None
    for ID in IDList:
        if ID not in timeSeries.keys():
            NewIDs.append(ID)
    if len(NewIDs) != 0:
        new_data  = s_f.get_multiple_post_info(NewIDs)
        if new_data == None:
            return None
        parsed_data = s_f.parse_post_data(new_data, ['created_utc'])
        for ID in NewIDs:  
            utc = parsed_data[ID]['created_utc']
            timeSeries[ID] = [{"utc": utc, "ups":1, "downs":0, "score":1}]
            heapq.heappush(updateQueue, (0, ID))