def getEntriesFromUrl(url, delay=10): # print("Starting") try: page = helper.urlopen(url, delay=delay) except: logging.error("Failed to open url: " + url) return [] # print("Downloaded") html = page.read() soup = BeautifulSoup(html, "html.parser") result = [] baslik_id = soup.find('h1', id='title').attrs['data-id'] # baslik_id = soup.find('h1', id='title').attrs['data-slug'] baslikText = soup.find('h1', id='title').attrs['data-title'].strip() for j in soup.find('ul', id='entry-list').find_all('li'): # import re # i = re.sub('<br\s*?>', '\n', i) # i = j.find('article') i = j # number = int(j.attrs['value']) id_ = str(i.attrs['data-id']) text = textWithNewlines(i.find('div', class_='content')) text = text.strip() author = str(i.attrs['data-author']) # author = i.find('span', itemprop='name').getText().strip() favoriteCount = int(i.attrs['data-favorite-count']) # date = i.find('a',class_='entry-date').find('time',class_='creation-time').attrs['datetime'] dateText = i.find('a',class_='entry-date').text try: timestamp = helper.datetimeToTimestamp(getDatetimesFromEntryDate(dateText)[0]) except: timestamp = None # datetimeObject = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S') # text = " ".join(i.find_all(text=lambda t: True)).encode('utf-8').strip() result.append(Entry(text = text, author = author, timestamp = timestamp, baslik_id = baslik_id, favoriteCount = favoriteCount, id_ = id_)) return result
def setTimestamp(self, dt): self.timestamp = helper.datetimeToTimestamp(dt)