def get_new_post_names(time_period): '''gathers new post titles for time_period minutes inserts the names into a file to be used later''' try: f = open( "/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names", 'a') except IOError: print("IOError") pass current = time.time() print current finish = current + (60 * time_period) print finish while finish >= time.time(): names = s_f.get_post_id_list(1, dir='new', paramType='sort', param='new') names = [name + ',' for name in names] f.writelines(names) time.sleep(45) f.close()
def get_new_post_names(time_period): '''gathers new post titles for time_period minutes inserts the names into a file to be used later''' try: f = open("/Users/johndoty/Documents/workspace/Data_Aggregation/src/Scrape_Jobs/new_post_names", 'a') except IOError: print ("IOError") pass current = time.time() print current finish = current + (60*time_period) print finish while finish >= time.time(): names = s_f.get_post_id_list(1, dir='new', paramType='sort', param='new') names = [name+',' for name in names] f.writelines(names) time.sleep(45) f.close()
def get_new_posts(timeSeries, updateQueue): ''' scrape new page check post id's against those already in timeSeries (dictionary) input new posts to timeSeries and updateQueue (PriorityQueue) ''' IDList = s_f.get_post_id_list(1,"", "new", "sort", "new") NewIDs = [] if IDList == None: return None for ID in IDList: if ID not in timeSeries.keys(): NewIDs.append(ID) if len(NewIDs) != 0: new_data = s_f.get_multiple_post_info(NewIDs) if new_data == None: return None parsed_data = s_f.parse_post_data(new_data, ['created_utc']) for ID in NewIDs: utc = parsed_data[ID]['created_utc'] timeSeries[ID] = [{"utc": utc, "ups":1, "downs":0, "score":1}] heapq.heappush(updateQueue, (0, ID))