def getLinks(self): r= requests.get(self.URL1) soup = BeautifulSoup(r.content) linksArray = [] nasdaqArray = [] index = 0; links = soup.findAll("ul",{"class":"list-links"}) dates = soup.findAll("div",{"class":"col-sm-3 col-md-2"}) #dates = soup.findAll("p"),{"class":"date"}) #timePublished = soup.findAll("p",{"class":"time"}) buttons = soup.findAll("div",{"class":"btn-group"}) [button.extract() for button in buttons] for link in links: title = link.findAll('a',href=True) text = link.findAll('li') match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?.*?[)]",link.getText().lower()) if match: if match.group("symbol"): match2 = re.search("to Present at",link.getText()) if not match2: for symbol in reader.requestArray: if symbol[0].lower() == match.group("symbol"): if not "," in str(dates[index]): newLink = Link() newLink.symbol = symbol[0] newLink.url = title[0]['href'] newLink.text = text[1].text newLink.linkText = title[0].text newLink.date = dates[index].text.strip() newLink.source = "PrNewswire" linksArray.append(newLink) elif self.today in str(dates[index]): newLink = Link() newLink.symbol = symbol[0] newLink.url = title[0]['href'] newLink.text = text[1].text newLink.linkText = title[0].text newLink.date = dates[index].text.strip() newLink.source = "PrNewswire" linksArray.append(newLink) index= index+1 return linksArray
def getLinks(self): s = requests.Session() r = s.get(self.url) soup = BeautifulSoup(r.content) linksArray = [] items = soup.findAll('item') index = 0; for item in items: title = soup.select('item title') description = soup.select('item description') link = soup.select('item link') match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?[)]",item.getText().lower()) if match: if match.group("symbol"): for symbol in reader.requestArray: if symbol[0].lower() == match.group("symbol"): newLink = Link() newLink.symbol = symbol[0] newLink.url = link[index].text newLink.text = description[index].text newLink.linkText = title[index].text #newLink.date = dates[index].text.strip() newLink.source = "GlobeNewswire" linksArray.append(newLink) index= index+1 return linksArray
def getLinks(self): linksArray = [] for pageNumber in self.pageArray: urlQuery = self.query.format(pageNumber) #print URL+urlQuery resp = requests.get(self.URL+urlQuery) soup = BeautifulSoup(resp.content) index = 0; ul = soup.findAll("ul",{"class":"bw-news-list"}) links = soup.select("ul.bw-news-list li") dates = soup.findAll("time") headlyne = soup.select("ul.bw-news-list h3") summaries = soup.select("ul.bw-news-list p") for link in links: title = link.findAll('a',href=True) text = link.findAll('p') #match1 = re.search("") to match the company name with symbol[0] from reader match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?[)]",link.getText().lower()) if match: if match.group("symbol"): match2 = re.search("to present at",link.getText().lower()) if not match2: if self.today in str(dates[index]): for symbol in reader.requestArray: if symbol[0].lower() == match.group("symbol"): newLink = Link() newLink.symbol = symbol[0] newLink.url = title[0]['href'] newLink.text = text[0].text newLink.linkText = title[0].text newLink.date = dates[index].text.strip() newLink.source = "BusinessWire" linksArray.append(newLink) index= index+1 return linksArray
def getLinks(self): s = requests.Session() dummyResp = s.get("http://www.marketwired.com/") linksArray = [] for pageNumber in self.pageArray: urlQuery = self.query.format(pageNumber) resp = s.get(self.URL1) queryResp = s.get(resp.url+urlQuery) soup = BeautifulSoup(queryResp.content) links = soup.findAll("div",{"style":"margin-bottom: 30px;"}) dates = soup.findAll("span",{"style":"color: #888888; font-size: 9pt"}) index = 0; for link in links: title = link.findAll('a',href=True) text = link.findAll('div',{"class":"search-results-width"}) match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?[)]",link.getText().lower()) if match: if match.group("symbol"): match2 = re.search("to present at",link.getText().lower()) if not match2: for symbol in reader.requestArray: if symbol[0].lower() == match.group("symbol"): if self.today in str(dates[index]): newLink = Link() newLink.symbol = symbol[0] newLink.url = "http://www.marketwired.com"+title[0]['href'] newLink.text = text[1].text newLink.linkText = title[0].text newLink.date = dates[index].text.strip() newLink.source = "MarketWired" linksArray.append(newLink) index= index+1 return linksArray