Exemple #1
0
 def prepare(self):
     try:
         self._session = self._mendeley.start_client_credentials_flow(
         ).authenticate()
         self._initialized = True
     except Exception as e:
         log.critical(e)
Exemple #2
0
    def get_profile_by_id(self, profile_id: str) -> Profile:
        if not self._initialized:
            log.critical("get_profile_by_id has been fired but the SDKCrawler was not initialized")
            return []

        profile = self._session.profiles.get(profile_id)
        return get_profile_from_json(profile.json)
Exemple #3
0
    def get_profile_by_id(self, profile_id: str) -> Profile:
        if not self._initialized:
            log.critical(
                "get_profile_by_id has been fired but the SDKCrawler was not initialized"
            )
            return []

        profile = self._session.profiles.get(profile_id)
        return get_profile_from_json(profile.json)
Exemple #4
0
    def get_documents_by_group_id(self, group_id: str) -> [Document]:
        if not self._initialized:
            log.critical("get_documents_by_group_id has been fired but the SDKCrawler was not initialized")
            return []
        results = []

        documents = self._session.group_documents(group_id).iter(view='all')
        for document in documents:
            d = get_document_from_json(document.json)
            results.append(d)
        return results
Exemple #5
0
    def get_group_members(self, group_id: str) -> [Member]:
        if not self._initialized:
            log.critical("get_group_members has been fired but the SDKCrawler was not initialized")
            return []
        results = []

        members = self._session.group_members(group_id).iter()
        for member in members:
            m = get_member_from_json(member.member_json)
            if m.role != 'follower':
                results.append(m)
        return results
Exemple #6
0
    def get_documents_by_group_id(self, group_id: str) -> [Document]:
        if not self._initialized:
            log.critical(
                "get_documents_by_group_id has been fired but the SDKCrawler was not initialized"
            )
            return []
        results = []

        documents = self._session.group_documents(group_id).iter(view='all')
        for document in documents:
            d = get_document_from_json(document.json)
            results.append(d)
        return results
Exemple #7
0
    def get_group_members(self, group_id: str) -> [Member]:
        if not self._initialized:
            log.critical(
                "get_group_members has been fired but the SDKCrawler was not initialized"
            )
            return []
        results = []

        members = self._session.group_members(group_id).iter()
        for member in members:
            m = get_member_from_json(member.member_json)
            if m.role != 'follower':
                results.append(m)
        return results
Exemple #8
0
    def get_documents_by_profile_id(self, profile_id: str) -> [Document]:
        if not self._initialized:
            log.critical("get_documents_by_profile_id has been fired but the SDKCrawler was not initialized")
            return []
        results = []

        """
        Unfortunately the official Mendeley SDK has no support for document queries by non-logged-in profile-ids
        Therefore i'll hack around that and reuse the session object to authenticate my own call.
        Critical SDK class:
        https://github.com/Mendeley/mendeley-python-sdk/blob/master/mendeley/resources/documents.py
        """

        documents = ExtendedDocuments(self._session).iter(view='all', profile_id=profile_id, authored='true')
        for document in documents:
            d = get_document_from_json(document.json)
            results.append(d)
        return results
Exemple #9
0
    def get_documents_by_profile_id(self, profile_id: str) -> [Document]:
        if not self._initialized:
            log.critical(
                "get_documents_by_profile_id has been fired but the SDKCrawler was not initialized"
            )
            return []
        results = []
        """
        Unfortunately the official Mendeley SDK has no support for document queries by non-logged-in profile-ids
        Therefore i'll hack around that and reuse the session object to authenticate my own call.
        Critical SDK class:
        https://github.com/Mendeley/mendeley-python-sdk/blob/master/mendeley/resources/documents.py
        """

        documents = ExtendedDocuments(self._session).iter(
            view='all', profile_id=profile_id, authored='true')
        for document in documents:
            d = get_document_from_json(document.json)
            results.append(d)
        return results
Exemple #10
0
        configuration.load()
        log.info("Configuration has been loaded")

        # Create data controller and assert schema
        # That will remove the race conditions of the gunicorn worker if it's done on every startup
        data_controller = DataController(configuration.database)
        data_controller.assert_schema()

    # Pipeline runner
    elif command == "pipeline":
        config = ServiceConfiguration()
        config.load()

        data_controller = DataController(config.database)
        if not data_controller.is_initialized():
            log.critical("Database is not initialized")
            exit()

        crawler = None
        if not config.uses_mendeley:
            log.info("Pipeline uses FileCrawler")
            crawler = FileCrawler()
        else:
            from mendeleycache.crawler.sdk_crawler import SDKCrawler
            log.info("Pipeline uses SDKCrawler".format(
                app_id=config.crawler.app_id,
                app_secret=config.crawler.app_secret))
            crawler = SDKCrawler(app_id=config.crawler.app_id,
                                 app_secret=config.crawler.app_secret)

        crawl_controller = CrawlController(crawler,
Exemple #11
0
        configuration.load()
        log.info("Configuration has been loaded")

        # Create data controller and assert schema
        # That will remove the race conditions of the gunicorn worker if it's done on every startup
        data_controller = DataController(configuration.database)
        data_controller.assert_schema()

    # Pipeline runner
    elif command == "pipeline":
        config = ServiceConfiguration()
        config.load()

        data_controller = DataController(config.database)
        if not data_controller.is_initialized():
            log.critical("Database is not initialized")
            exit()

        crawler = None
        if not config.uses_mendeley:
            log.info("Pipeline uses FileCrawler")
            crawler = FileCrawler()
        else:
            from mendeleycache.crawler.sdk_crawler import SDKCrawler
            log.info("Pipeline uses SDKCrawler".format(
                app_id=config.crawler.app_id,
                app_secret=config.crawler.app_secret
            ))
            crawler = SDKCrawler(
                app_id=config.crawler.app_id,
                app_secret=config.crawler.app_secret
Exemple #12
0
 def prepare(self):
     try:
         self._session = self._mendeley.start_client_credentials_flow().authenticate()
         self._initialized = True
     except Exception as e:
         log.critical(e)