def gather_games(): db = DBsgf() num = 0 for f, info in downloaded_sgf_files(): try: db.add(game_item(f, info)) os.remove(f) num += 1 except Exception as err: print("error while loading file: '%s':" % f) print(err.message) print("%d new files added" % num)
def parse(self, response): db = DBsgf() this_year = self.get_year(response) # all tournament links: # response.xpath('//tbody//a/@href').extract() # for selection in response.xpath('//tbody//tr')[1:]: item = IgokisenNewsItem(this_year).parse(selection) if not db.exists(item): db.add(item) url = response.urljoin(item['link']) yield scrapy.Request(url, callback=self.parseTournamentGames)
def testIgokisenNewsParsing(self): results = self.spider.parse(fake_response_from_file('Go_Topics.html')) # there should be 48 items for x in range(48): results.next() dbitems = DBsgf().session.query(DBNewsItem).order_by(DBNewsItem.date).all() self.assertEqual(len(dbitems), 48) item = dbitems[7] self.assertEqual(item.date.strftime('%Y-%m-%d'), '2015-04-02') self.assertEqual(item.game, 'GS Caltex Cup') self.assertEqual(item.link, 'file:///var/folders/08/1yh0yp1955z8rg6jdhrps2vw0000gn/T/kr/gs.html') self.assertEqual(item.nation,'Korea') self.assertEqual(item.site, 'igokisen')
def setupTestDB(): setupTestEnviron() db = DBsgf() db._deleteAllTables()