class GrandrtSpider(scrapy.Spider): name = 'glandrt' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=GLANDRT&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=GLANDRT&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=GLANDRT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'GLANDRT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'เดอะไนน์ ทาวเวอร์ส The Nine Towers ยูนิลีเวอร์เฮ้าส์ Unilever House' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน ไทยพาณิชย์ จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[60].strip() # Establishment Date self.item['establishmentDate'] = '12/4/2017 ' # Registration Date self.item['registrationDate'] = '21/04/2560' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class BofficeSpider(scrapy.Spider): name = 'boffice' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=BOFFICE&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=BOFFICE&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=BOFFICE&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'BOFFICE' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'โครงการอาคารสำนักงานภิรัชทาวเวอร์แอทเอ็มควอเทียร์ ภิรัชทาวเวอร์แอทเอ็มควอเทียร์ ตึกภิรัช อาคารภิรัช ภิรัช Bhiraj Tower' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน กสิกรไทย จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[54].strip() # Establishment Date self.item['establishmentDate'] = '15/01/2018' # Registration Date self.item['registrationDate'] = '23/01/2561' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class CpnreitSpider(scrapy.Spider): name = 'cpnreit' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=CPNREIT&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=CPNREIT&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=CPNREIT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'CPNREIT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'เซ็นทรัล CentralPlaza เซ็นทรัลพลาซาลาดพร้าว CentralPlaza Ladprao เซ็นทรัลพลาซารามอินทรา CentralPlaza Ramindra เซ็นทรัลมารีนา CentralPlaza Marina เซ็นทรัลพลาซาปิ่นเกล้า CentralPlaza Pinklao เซ็นทรัลพลาซาเชียงใหม่แอร์พอร์ต CentralPlaza Chiang Mai Airport เซ็นทรัลพลาซาพระราม 3 CentralPlaza Rama 3 เซ็นทรัลพลาซาบางนา CentralPlaza Bangna เซ็นทรัลพลาซาพระราม 2 CentralPlaza Rama 2 เซ็นทรัลพลาซารัตนาธิเบศร์ CentralPlaza Rattanathibet เซ็นทรัลเวิลด์ CentralPlaza World เซ็นทรัลพลาซาแจ้งวัฒนะ CentralPlaza Chaengwattana เซ็นทรัลพลาซาขอนแก่น CentralPlaza KhonKaen เซ็นทรัลพลาซาชลบุรี CentralPlaza Chonburi เซ็นทรัลพลาซาอุดรธานี CentralPlaza UdonThani เซ็นทรัลเฟสติวัลพัทยาบีช CentralPlaza Festival Pattaya Beach เซ็นทรัลพลาซาแกรนด์พระราม 9 CentralPlaza Grand Rama 9 เซ็นทรัลพลาซาพิษณุโลก CentralPlaza Phitsanulok เซ็นทรัลพลาซาเชียงราย CentralPlaza Chiang Rai CentralPlaza เซ็นทรัลพลาซาลำปาง Lampang เซ็นทรัลพลาซาสุราษฎร์ธานี CentralPlaza Suratthani เซ็นทรัลเฟสติวัลหาดใหญ่ CentralPlaza HatYai เซ็นทรัลเฟสติวัลเชียงใหม่ CentralPlaza Chiang Mai เซ็นทรัลพลาซาอุบลราชธานี CentralPlaza Ubon Ratchathani เซ็นทรัลพลาซาศาลายา CentralPlaza Salaya เซ็นทรัลเฟสติวัลสมุย CentralPlaza Samui เซ็นทรัลเฟสติวัลอีสต์วิลล์ CentralPlaza Eastville เซ็นทรัลพลาซาเวสต์เกต CentralPlaza Westgate เซ็นทรัลเฟสติวัลภูเก็ต CentralPlaza Phuket เซ็นทรัลพลาซาระยอง CentralPlaza Rayong เซ็นทรัลพลาซานครศรีธรรมราช CentralPlaza NakhonSiThammarat เซ็นทรัลพลาซามหาชัย CentralPlaza Mahachai เซ็นทรัลพลาซานครราชสีมา CentralPlaza NakhonRatchasima เซ็นลาด เซ็นทรัลลาดพร้าว ลาดพร้าว เซ็นราม เซ็นทรัลรามอินทรา เซ็นมารีนา เซ็นทรัลเซ็นเตอร์พัทยา เซ็นปิ่น เซ็นทรัลปิ่นเกล้า เซ็นแอร์พอร์ต เซ็นทรัลแอร์พอร์ต เซ็นทรัลเชียงใหม่แอร์พอร์ต เซ็นพระราม 3 เซ็นทรัลพระราม 3 เซ็นบางนา เซ็นทรัลบางนา เซ็นพระราม 2 เซ็นทรัลพระราม 2 เซ็นรัตนา เซ็นทรัลรัตนา เซ็นรัตนาธิเบศร์ เซ็นทรัลรัตนาธิเบศร์ เซ็นเวิลด์ เซ็นทรัลเวิลด์ เซ็นแจ้ง เซ็นทรัลแจ้ง เซ็นทรัลเเจ้งวัฒนะ แจ้งวัฒนะ เซ็นทรัลขอนแก่น เซ็นชลบุรี เซ็นทรัลชล เซ็นทรัลชลบุรี เซ็นอุดร เซ็นทรัลอุดรธานี เซ็นบีช เซ็นพัทยาบีช เซ็นทรัลบีช เซ็นทรัลพัทยาบีช เซ็นพระราม 9 เซ็นทรัลพระราม9 เซ็นทรัลแกรนด์พระราม 9 เซ็นพิษณุโลก เซ็นทรัลพิษณุโลก เซ็นทรัลเชียงราย เซ็นทรัลลำปาง เซ็นทรัลสุราษฏร์ เซ็นทรัลสุราษฏร์ธานี เซ็นหาดใหญ๋ เซ็นทรัลเฟสติวัลหาดใหญ่ เซ็นเฟสเชียงใหม่ เซ็นทรัลเฟสติวัลเชียงใหม่ เซ็นอุบล เซ็นทรัลอุบล เซ็นทรัลอุบลราชธานี เซ็นศาลายา เซ็นทรัลศาลายา เซ็นเฟสสมุย เซ็นทรัลเฟสติวัลสมุย เซ็นอีสต์วิลล์ เซ็นทรัลอีสต์วิลล์ เซ็นทรัลเฟสติวัลอีสต์วิลล์ เซ็นเวสต์เกต เซ็นทรัลเวสต์เกต เซ็นบางใหญ่ เซ็นทรัลบางใหญ่ เซ็นเฟสภูเก็ต เซ็นภูเก็ต เซ็นทรัลภูเก็ต เซ็นทรัลเฟสติวัลภูเก็ต เซ็นทรัลระยอง เซ็นทรัลคอน เซ็นทรัลนคร เซ็นทรัลนครศรี เซ็นทรัลนครศรีธรรมราช เซ็นมหาชัย เซ็นทรัลมหาชัย เซ็นนครราชศรีมา เซ็นทรัลนครราชศรีมา ' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัท หลักทรัพย์จัดการกองทุน ไทยพาณิชย์ จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[60].strip() # Establishment Date self.item['establishmentDate'] = '29/11/2017' # Registration Date self.item['registrationDate'] = '14/12/2560' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class AimirtSpider(scrapy.Spider): name = 'aimirt' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=AIMIRT&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=AIMIRT&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=AIMIRT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'AIMIRT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'โครงการแปซิฟิคห้องเย็น แปซิฟิคห้องเย็น ห้องเย็นแปซิฟิค Pacific โครงการเจดับเบิ้ลยูดีแปซิฟิค เจดับเบิ้ลยูดีแปซิฟิค JWD โครงการดาต้าเซฟ ดาต้าเซฟ DATASAFE โครงการทิพย์ 7 ทิพย์ TIP' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน ไทยพาณิชย์ จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[56].strip() # Establishment Date self.item['establishmentDate'] = '22/12/2017' # Registration Date self.item['registrationDate'] = '09/01/2561' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class LhhotelSpider(scrapy.Spider): name = 'lhhotel' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=LHHOTEL&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=LHHOTEL&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=LHHOTEL&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'LHHOTEL' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'โรงแรมแกรนด์เซนเตอร์พอยต์เทอร์มินอล 21 โรงแรมแกรนด์เซนเตอร์พอยต์เทอร์มินอล โรงแรมแกรนด์ เซนเตอร์พอยต์ เทอร์มินอล 21 Grand Center Point Terminal 21 Hotel โรงแรมแกรนด์เซนเตอร์พอยต์ราชดำริ โรงแรมแกรนด์ เซนเตอร์พอยต์ ' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน ไทยพาณิชย์ จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[60].strip() # Establishment Date self.item['establishmentDate'] = '11/12/2015' # Registration Date self.item['registrationDate'] = '22/12/2558' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class SripanwaSpider(scrapy.Spider): name = 'sripanwa' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=SRIPANWA&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=SRIPANWA&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=SRIPANWA&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'SRIPANWA' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'โรงแรมศรีพันวาภูเก็ต โรงแรม ศรีพันวา Sri Panwa Phuket Luxury Pool Villa Hotel บาบาบีชคลับหัวหิน บาบาบีชคลับ Baba Beach Club' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน ไทยพาณิชย์ จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[58].strip() # Establishment Date self.item['establishmentDate'] = '06/12/2016' # Registration Date self.item['registrationDate'] = '23/12/2559' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class HreitSpider(scrapy.Spider): name = 'hreit' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=HREIT&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=HREIT&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=HREIT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'HREIT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'นิคมอุตสาหกรรม ดับบลิวเอชเอ อีสเทิร์น ซีบอร์ด นิคมอุตสาหกรรมดับบลิวเอชเอ อีสเทิร์นซีบอร์ด นิคมอุตสาหกรรมดับบลิวเอชเออีสเทิร์น เขตประกอบอุตสาหกรรม SIL โลจิสติกส์พาร์ค Logistics Park' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน ไทยพาณิชย์ จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[58].strip() # Establishment Date self.item['establishmentDate'] = '21/11/2016' # Registration Date self.item['registrationDate'] = '28/11/2559' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class GvreitSpider(scrapy.Spider): name = 'gvreit' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=GVREIT&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=GVREIT&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=GVREIT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'GVREIT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'โครงการปาร์ค เวนเชอร์เจอร์ อีโคเพล็กซ์ ปาร์คเวนเชอร์ ปาร์คเวนเจอร์ อีโคเพล็กซ์ โครงการปาร์คเวนเชอร์ โครงการปาร์คเวนเจอร์ Park Ventures Ecoplex สารทรสแควร์ Sathorn Square' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน กสิกรไทย จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[58].strip() # Establishment Date self.item['establishmentDate'] = '22/3/2016' # Registration Date self.item['registrationDate'] = '04/04/2559' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class DreitSpider(scrapy.Spider): name = 'dreit' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=DREIT&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=DREIT&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=DREIT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'DREIT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'โรงแรมดุสิตธานีลากูน่า โรงแรมดุสิตธานี Dusit Thani Laguna Phuket โรงแรมดุสิต โรงแรมดุสิตธานีหัวหิน โรงแรมดุสิตธานีดีทู' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัท หลักทรัพย์จัดการกองทุน กรุงไทย จำกัด (มหาชน)' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[56].strip() # Establishment Date self.item['establishmentDate'] = '29/11/2017' # Registration Date self.item['registrationDate'] = '15/12/2560' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class WhartSpider(scrapy.Spider): name = 'whart' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=WHART&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=WHART&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=WHART&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'WHART' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'คลังสินค้าลาดกระบัง WHA Lat Krabang Warehouse เมกะโลจิสติกส์เซ็นเตอร์ชลหารพิจิตรกม Mega Logistics Center Chonlaharnpichit' # Trustee Map To Elasticsearch self.item['trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุนกสิกรไทย จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[62].strip() # Establishment Date self.item['establishmentDate'] = '8/12/2014 ' # Registration Date self.item['registrationDate'] = '18/12/2557' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class WhabtSpider(scrapy.Spider): name = 'whabt' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=WHABT&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=WHABT&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=WHABT&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'WHABT' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'อาคารเอสเจ ตึกเอสเจ เอสเจ อินฟินิท วันบิสซิเนส SJ Infinite One Business Complex อาคารบางนา ตึกบางนา บิสซิเนสคอมเพล็กซ์ ' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน กรุงไทย จำกัด (มหาชน)' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[58].strip() # Establishment Date self.item['establishmentDate'] = '4/11/2015' # Registration Date self.item['registrationDate'] = '16/11/2558' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item
class AmatarSpider(scrapy.Spider): name = 'amatar' urls = [ 'https://www.set.or.th/set/companyprofile.do?symbol=AMATAR&ssoPageId=4&language=th&country=TH', 'https://www.set.or.th/set/companyprofile.do?symbol=AMATAR&ssoPageId=4&language=en&country=TH', 'https://marketdata.set.or.th/mkt/stockquotation.do?symbol=AMATAR&ssoPageId=1&language=th&country=TH' ] item = ReitItem() one = False two = False three = False def start_requests(self): for url in self.urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): if (response.url == self.urls[0]): print('Loop 0') # Symbol self.item['symbol'] = 'AMATAR' # Nickname Map To Elasticsearch self.item[ 'nickName'] = 'นิคมอุตสาหกรรมอมตะนคร Amata Nakorn Industrial นิคมอุตสาหกรรมอมตะซิตี้ Amata City' # Trustee Map To Elasticsearch self.item[ 'trustee'] = 'บริษัทหลักทรัพย์จัดการกองทุน กสิกรไทย จำกัด' # Trust Name TH self.item['trustNameTh'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() # Address self.item['address'] = response.css( "tr td div.row div.col-md-9::text").getall()[1].strip() # Url self.item['url'] = response.css( "td div.row div.col-xs-12 a::text").getall()[0].strip() # Reit Manager self.item['reitManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[4].strip() # Property Manager self.item['propertyManager'] = response.css( "tr td div.row div.col-md-9::text").getall()[5].strip() # Investment Policy self.item['investmentPolicy'] = response.css( "tr td div.row div.col-md-9::text").getall()[6].strip() # Pe Value self.item['peValue'] = response.css( "td div.row div.col-xs-12 div::text").getall()[1].strip() # Par/NAV self.item['parNAV'] = response.css( "td div.row div.col-xs-12 div::text").getall()[5].strip() # Dvd/Yield self.item['dvdYield'] = response.css( "td div.row div.col-xs-12 div::text").getall()[3].strip() # Investment Amount self.item['investmentAmount'] = response.css( "td div.row div.col-xs-9::text").getall()[9].strip() # Policy self.item['policy'] = response.css( "td div.row div.col-xs-12::text").getall()[60].strip() # Establishment Date self.item['establishmentDate'] = '16/6/2015' # Registration Date self.item['registrationDate'] = '30/06/2558' self.one = True if (response.url == self.urls[1]): print('Loop 1') # Trust Name TH self.item['trustNameEn'] = response.css( "tr td div.row div.col-md-9::text").getall()[0].strip() self.two = True if (response.url == self.urls[2]): print('Loop 2') # Price of Day price_of_day = response.css("tbody")[0].css("tr")[1].css( "td::text").getall() if len(price_of_day) == 2: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[1].get().strip() elif len(price_of_day) == 3: self.item['priceOfDay'] = response.css("tbody")[0].css( "tr")[1].css("td::text")[2].get().strip() # Max Price of Day self.item['maxPriceOfDay'] = response.css("tbody")[0].css( "tr")[6].css('td::text')[1].get().strip() # Min Price of Day self.item['minPriceOfDay'] = response.css("tbody")[0].css( "tr")[7].css('td::text')[1].get().strip() # Par Value self.item['parValue'] = response.css("tbody")[1].css("tr")[0].css( "td::text")[1].get().strip() # Ceiling Value self.item['ceilingValue'] = response.css("tbody")[1].css( "tr")[1].css("td::text")[1].get().strip() # Foor Value self.item['floorValue'] = response.css("tbody")[1].css( "tr")[2].css("td::text")[1].get().strip() self.three = True print("End Loop") if (self.one == True) and (self.two == True) and (self.three == True): yield self.item