def record(): """Return scraping results from the MIT spider.""" spider = mit_spider.MITSpider() response = fake_response_from_file('mit/test_splash.html') parsed_record = spider.build_item(response) assert parsed_record return parsed_record
def parsed_node(): """Call parse_node and return its request call.""" spider = mit_spider.MITSpider() response = fake_response_from_file('mit/test_list.html') tag = spider.itertag node = get_node(spider, tag, response, rtype="html") return spider.parse_node(response, node).next()
def supervisors(): """Response from a record with multiple supervisors.""" spider = mit_spider.MITSpider() body = """ <html> <body> <tr class="ds-table-row odd "> <td class="label-cell">dc.contributor.advisor</td> <td>Seth Lloyd and J.D. Joannopoulos</td> <td>en_US</td> </tr> <body> <html> """ response = fake_response_from_string(body) return spider.build_item(response)
def non_thesis(): """Return a heprecord for a Master's thesis (should be None as we don't want them).""" spider = mit_spider.MITSpider() body = """ <html> <body> <tr class="ds-table-row odd "> <td class="label-cell">dc.description.degree</td> <td>M.Sc.</td> <td>en_US</td> </tr> </body> </html> """ response = fake_response_from_string(body) return spider.build_item(response)