Ejemplo n.º 1
0
	def getLinks(self):
		r= requests.get(self.URL1)
		 
		soup = BeautifulSoup(r.content)
		 
		linksArray = []
		nasdaqArray = []
		index = 0;
	 
		links = soup.findAll("ul",{"class":"list-links"})
		dates = soup.findAll("div",{"class":"col-sm-3 col-md-2"})
		#dates = soup.findAll("p"),{"class":"date"})
		#timePublished = soup.findAll("p",{"class":"time"})
	 	buttons = soup.findAll("div",{"class":"btn-group"})
 		[button.extract() for button in buttons]

		for link in links:
			
			title = link.findAll('a',href=True)
			text = link.findAll('li')
			


			match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?.*?[)]",link.getText().lower())
			if match:
				if match.group("symbol"):
					match2 = re.search("to Present at",link.getText())
					if not match2:
						for symbol in reader.requestArray:
						
							if symbol[0].lower() == match.group("symbol"):
								if not "," in str(dates[index]):
							
									newLink = Link()
									newLink.symbol = symbol[0]
									newLink.url = title[0]['href']
									newLink.text = text[1].text
									newLink.linkText = title[0].text
									newLink.date = dates[index].text.strip()
									newLink.source = "PrNewswire"
									linksArray.append(newLink)
								
								elif self.today in str(dates[index]):
									
									newLink = Link()
									newLink.symbol = symbol[0]
									newLink.url = title[0]['href']
									newLink.text = text[1].text
									newLink.linkText = title[0].text
									newLink.date = dates[index].text.strip()
									newLink.source = "PrNewswire"
									linksArray.append(newLink)

			index= index+1	
	 
		return linksArray
Ejemplo n.º 2
0
	def getLinks(self):
		s = requests.Session()
		r = s.get(self.url)
		soup = BeautifulSoup(r.content)
		linksArray = []

		items = soup.findAll('item')
		
		index = 0;

		for item in items:
			title = soup.select('item title')
			description = soup.select('item description')
			link = soup.select('item link')
			
			match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?[)]",item.getText().lower())
			if match:
				if match.group("symbol"):
					for symbol in reader.requestArray:
						if symbol[0].lower() == match.group("symbol"):
							newLink = Link()
							newLink.symbol = symbol[0]
							newLink.url = link[index].text
							newLink.text = description[index].text
							newLink.linkText = title[index].text
							#newLink.date = dates[index].text.strip()
							newLink.source = "GlobeNewswire"
							linksArray.append(newLink)
			index= index+1

		return linksArray
Ejemplo n.º 3
0
	def getLinks(self):
		linksArray = []
	 
		for pageNumber in self.pageArray:
			urlQuery = self.query.format(pageNumber)
			#print URL+urlQuery
			resp = requests.get(self.URL+urlQuery)
			soup = BeautifulSoup(resp.content)
	 
			index = 0;
			
			ul = soup.findAll("ul",{"class":"bw-news-list"})
			links = soup.select("ul.bw-news-list li")
			dates = soup.findAll("time")
			headlyne = soup.select("ul.bw-news-list h3")
			summaries = soup.select("ul.bw-news-list p")
			
			for link in links:
				title = link.findAll('a',href=True)
				text = link.findAll('p')
				#match1 = re.search("") to match the company name with symbol[0] from reader
				match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?[)]",link.getText().lower())
				if match:
					if match.group("symbol"):
						match2 = re.search("to present at",link.getText().lower())
						if not match2:
							if self.today in str(dates[index]):
								for symbol in reader.requestArray:
									if symbol[0].lower() == match.group("symbol"):
										newLink = Link()
										newLink.symbol = symbol[0]
										newLink.url = title[0]['href']
										newLink.text = text[0].text
										newLink.linkText = title[0].text
										newLink.date = dates[index].text.strip()
										newLink.source = "BusinessWire"
										linksArray.append(newLink)
				index= index+1		
	 
		return linksArray

	 	
Ejemplo n.º 4
0
	def getLinks(self):
		
		s = requests.Session()
		dummyResp = s.get("http://www.marketwired.com/")
		linksArray = []
	 
		for pageNumber in self.pageArray:
			urlQuery = self.query.format(pageNumber)
			resp = s.get(self.URL1)
			queryResp = s.get(resp.url+urlQuery)
			
			soup = BeautifulSoup(queryResp.content)
			links = soup.findAll("div",{"style":"margin-bottom: 30px;"})
			dates = soup.findAll("span",{"style":"color: #888888; font-size: 9pt"})

			index = 0;		 
			
			for link in links:
				title = link.findAll('a',href=True)
				text = link.findAll('div',{"class":"search-results-width"})

				match = re.search("[(]\s?nasdaq(:| :|: | :|)\s?(?P<symbol>[a-z][a-z][a-z][a-z]?)\s?[)]",link.getText().lower())
				if match:
					if match.group("symbol"):
						match2 = re.search("to present at",link.getText().lower())
						if not match2:
							for symbol in reader.requestArray:
								if symbol[0].lower() == match.group("symbol"):
	
									if self.today in str(dates[index]):
										newLink = Link()
										newLink.symbol = symbol[0]
										newLink.url = "http://www.marketwired.com"+title[0]['href']
										newLink.text = text[1].text
										newLink.linkText = title[0].text
										newLink.date = dates[index].text.strip()
										newLink.source = "MarketWired"
										linksArray.append(newLink)
				index= index+1
	 
		return linksArray