Exemple #1
0
    def run(self):
        self.log("Updating medRxiv/bioRxiv articles...")
        TaskRunner.run_task(MedBiorxivUpdateTask,
                            force_update=self.force_update,
                            count=self.medrxiv_count,
                            started_by=self._task.started_by)
        self.log("Finished updating medRxiv/bioRxiv articles...")

        self.progress(15)

        self.log("Updating arXiv articles...")
        TaskRunner.run_task(ArxivUpdateTask,
                            force_update=self.force_update,
                            count=self.arxiv_count,
                            started_by=self._task.started_by)

        self.progress(30)

        self.log("Finished updating arXiv articles...")

        self.log("Updating Elsevier articles...")
        TaskRunner.run_task(ElsevierUpdateTask,
                            force_update=self.force_update,
                            count=self.elsevier_count,
                            started_by=self._task.started_by)
        self.log("Finished updating Elsevier articles...")

        self.progress(45)

        self.log("Updating Pubmed articles...")
        TaskRunner.run_task(PubmedUpdateTask,
                            force_update=self.force_update,
                            count=self.pubmed_count,
                            started_by=self._task.started_by)
        self.log("Finished updating Pubmed articles...")

        self.progress(60)

        if settings.UPDATE_VECTORIZER:
            self.log("Updating Topic assigment...")
            task_launcher = get_task_launcher('search')

            task_config = {
                'service': 'search',
                'parameters': [],
                'started_by': self._task.started_by
            }
            task_launcher.launch_task(name="setup-vectorizer",
                                      config=task_config,
                                      block=True)
            self.progress(70)
            self.log("Finished setup-vectorizer")

            task_launcher.launch_task(name="update-category-assignment",
                                      config=task_config,
                                      block=True)
            self.progress(80)
            self.log("Finished updating category assigment")

            task_launcher.launch_task(name="nearest-neighbor-topic-assignment",
                                      config=task_config,
                                      block=True)
            self.progress(90)
            self.log("Finished nearest-neighbor-topic-assignment")

            task_launcher.launch_task(name="reduce-embedding-dimensionality",
                                      config=task_config,
                                      block=True)
            self.log("Finished reduce-embedding-dimensionality")
            self.progress(95)
        else:
            self.log("Paper matrix update and topic assignment skipped.")

        self.log("Extract locations from papers...")
        TaskRunner.run_task(GeoParserTask, started_by=self._task.started_by)
        self.log("Finished extracting locations from papers")
Exemple #2
0
if __name__ == '__main__':
    import django
    django.setup()

    from scrape.task_check_covid_related import CheckCovidRelatedTask
    from tasks.task_runner import TaskRunner

    TaskRunner.run_task(CheckCovidRelatedTask, started_by="Setup Script")
Exemple #3
0
    def run(self):
        self.log("Get new medRxiv/bioRxiv articles...")
        TaskRunner.run_task(MedBiorxivNewArticlesTask,
                            started_by=self._task.started_by)
        self.log("Finished getting new medRxiv/bioRxiv articles...")

        self.progress(10)

        self.log("Get new arXiv articles...")
        TaskRunner.run_task(ArxivNewArticlesTask,
                            started_by=self._task.started_by)

        self.progress(20)

        self.log("Finished getting new arXiv articles...")

        self.log("Get new Elsevier articles...")
        TaskRunner.run_task(ElsevierNewArticlesTask,
                            started_by=self._task.started_by)

        self.progress(40)

        self.log("Get new Pubmed articles...")
        TaskRunner.run_task(PubmedNewArticlesTask,
                            started_by=self._task.started_by)
        self.log("Finished getting new Pubmed articles...")

        self.progress(60)

        if settings.UPDATE_VECTORIZER:
            self.log("Updating Topic assigment...")
            task_launcher = get_task_launcher('search')

            task_config = {
                'service': 'search',
                'parameters': [],
                'started_by': self._task.started_by
            }
            task_launcher.launch_task(name="setup-vectorizer",
                                      config=task_config,
                                      block=True)
            self.progress(70)
            self.log("Finished setup-vectorizer")

            task_launcher.launch_task(name="update-category-assignment",
                                      config=task_config,
                                      block=True)
            self.progress(80)
            self.log("Finished updating category assigment")

            task_launcher.launch_task(name="nearest-neighbor-topic-assignment",
                                      config=task_config,
                                      block=True)
            self.progress(90)
            self.log("Finished nearest-neighbor-topic-assignment")

            task_launcher.launch_task(name="reduce-embedding-dimensionality",
                                      config=task_config,
                                      block=True)
            self.log("Finished reduce-embedding-dimensionality")
            self.progress(95)
        else:
            self.log("Paper matrix update and topic assignment skipped.")

        self.log("Update Altmetric data of new papers")
        TaskRunner.run_task(AltmetricUpdateTask,
                            started_by=self._task.started_by,
                            update_all=True,
                            only_new=True)
        self.log("Finished updating Altmetric data of new papers")

        self.log("Extract locations from papers...")
        TaskRunner.run_task(GeoParserTask, started_by=self._task.started_by)
        self.log("Finished extracting locations from papers")