コード例 #1
0
	def put(self, projectKey, key): #update
		issue = Issue().get(key)
		issue.project = Project().get(projectKey)
		issue.summary = self.request.get('summary') or issue.summary
		issue.text = self.request.get('text') or issue.text
		closed = self.request.get('closed')
		if closed: issue.closed = closed.lower() == "true"
		issue.put()
		url = issue.url(self.request.url)
		self.response.headers.add_header("Location", url)
		self.response.out.write(json(url))
コード例 #2
0
ファイル: scrapers.py プロジェクト: deansc/scraping_erudit
class IssueScraper(Scraper):
    model = Issue()

    def scrap_and_assign(self):
        for article_scraper in self.get_articles_scrapers():
            article_scraper.scrap_and_assign()

    def get_articles_scrapers(self):
        for li in self._extract_articles_html():
            url = PathConfig.ERUDIT_PATH + li.h6.a["href"]
            article_title = li.h6.a.text.strip()
            issue = self.get_issue_number()
            article = [i for i in url.split("/") if i != ""][-1]

            iss = self.extract_issue(issue)
            cassette_prefix = self.generate_cassette_prefix(
                volume=iss.get("volume"),
                number=iss.get("number"),
                year=iss.get("year"))
            cassette_name = cassette_prefix + "/" + article

            scraper = ArticleScraper(revue=self.revue,
                                     url=url,
                                     cassette_name=cassette_name)
            scraper.model = Article()  # fix: not sure how not to do this
            self.model.articles.append(scraper.model)
            self.model.title = self.get_title()

            scraper.model.issue = self.model
            scraper.model.pages = self.get_pages(li)
            scraper.model.title = article_title

            yield scraper

    def get_title(self):
        return self.soup.find_all("span",
                                  {"class": "theme-title"})[0].text.strip()

    def get_issue_number(self):
        return self.soup.find_all("span", {"class": "issue-number"})[0].text

    def get_pages(self, li):
        """
        Return None when no pages
        """
        a = [i for i in li.find_all("p", {"class": "bib-record__pages"})][0]
        pages = re.findall(self.REGEXP_PAGES, a.text)
        return next(iter(pages), None)

    def _extract_articles_html(self):
        for li in self.soup.find_all("li", {"class": "bib-record"}):
            yield li
コード例 #3
0
ファイル: scrapers.py プロジェクト: deansc/scraping_erudit
    def get_issue_scrapers(self):
        for li in self._extract_issues_html():
            url = PathConfig.ERUDIT_PATH + li.a["href"]
            issue = li.span.text

            iss = self.extract_issue(issue)
            cassette_name = self.generate_cassette_prefix(
                volume=iss.get("volume"),
                number=iss.get("number"),
                year=iss.get("year"))

            scraper = IssueScraper(revue=self.revue,
                                   url=url,
                                   cassette_name=cassette_name)
            scraper.model = Issue()  # fix: not sure how not to do this
            self.model.issues.append(scraper.model)

            scraper.model.revue = self.model
            scraper.model.volume = iss.get("volume")
            scraper.model.number = iss.get("number")
            scraper.model.year = iss.get("year")

            yield scraper