def handle(self, *args, **options):
        # {year : publications}
        publications = {}
        if os.path.isfile(PROGRESS_FILE):
            # load our previous progress, if any
            with open(PROGRESS_FILE, 'rb') as progress_file:
                publications = pickle.load(progress_file)
            # remove it for now --
            # if anything goes wrong, we'll save it again!
            os.remove(PROGRESS_FILE)

        for year in range(2007, date.today().year):
            if not year in publications:
                self.stdout.write("GETTING PAPERS FOR YEAR " + str(year))
                try:
                    publications[year] = \
                        list(scholar_data.get_published_papers(year, year))
                except:
                    self.abort(publications)
                    raise

        self.stdout.write("ALL PAPERS GOTTEN, FILLING INFO FOR EACH")
        for year in publications:
            for publication in publications[year][:]:
                try:
                    self.handle_publication(publication, year)
                    publications[year].remove(publication)
                except:
                    self.abort(publications)
                    raise
        self.stdout.write('\n' * 4)
        self.stdout.write('#' * 10)
        self.stdout.write('\n' * 4)

        for year in range(2007, date.today().year):
            nih_data.scrape(str(year))
    def test_scraping(self):
        p1 = DummyPublication(
            'Hats and Stuff', 63, 'Hats, Bob and Hats, Billy',
            url='example.com',
            abstract='About hats and stuff',
            year=1992,
            volume=1)
        p2 = DummyPublication(
            'Hats and Other Stuff', 36, 'Tanzi, Rudolph',
            url='example.com',
            abstract='More about hats and stuff',
            year=2015,
            volume=1)
        p3 = DummyPublication(
            'No Abstract or website: Reviewed', 0, 'Bob, Billy',
            year=2015)
        p4 = DummyPublication(
            'No Year: Revisited', 12, 'Bob, Billy and others',
            url='example.com', journal='Journal of Science')
        command = scrape.Command()
        command.handle_publication(p1, 1992)
        command.handle_publication(p2, 2015)
        command.handle_publication(p3, 2015)
        command.handle_publication(p4, 2014)
        nih_data.scrape('1992')
        nih_data.scrape('2015')

        for paper in Paper.objects.filter(citations=36):
            self.assertEqual(paper.title, 'Hats and Other Stuff')
            self.assertEqual(paper.year, 2015)
        query = Paper.objects.filter(url='example.com')
        self.assertEqual(len(query), 3)
        query = Author.objects.filter(name='others')
        self.assertEqual(len(query), 0)
        query = Paper.objects.filter(journal='Journal of Science')
        self.assertEqual(len(query), 1)
 def handle(self, *args, **options):
     for year in range(2007, date.today().year):
         nih_data.scrape(str(year))