Example #1
0
File: base.py Project: 4iji/crawley
    def syncdb(self, syncb_command):
        """
            Checks for storages configuration in the settings.py file
        """

        self.connector = None
        syncb_command.sessions = []

        documents_sessions = { 'JSON_DOCUMENT' : json_session,
                               'XML_DOCUMENT' : xml_session,
                               'CSV_DOCUMENT' : csv_session,
                               'MONGO_DB_HOST' : mongo_session,
                               'COUCH_DB_HOST' : couch_session,
                             }

        for storage_name, session in documents_sessions.iteritems():

            if has_valid_attr(syncb_command.settings, storage_name):

                session.set_up(syncb_command.settings, storage_name)
                syncb_command.sessions.append(session)

        if has_valid_attr(syncb_command.settings, "DATABASE_ENGINE"):

            import_user_module("models", exit=False)
            syncb_command.sessions.append(database_session)
            self.connector = connectors[syncb_command.settings.DATABASE_ENGINE](syncb_command.settings)
Example #2
0
    def syncdb(self, syncb_command):
        """
            Checks for storages configuration in the settings.py file
        """

        self.connector = None
        syncb_command.sessions = []

        documents_sessions = {
            'JSON_DOCUMENT': json_session,
            'XML_DOCUMENT': xml_session,
            'CSV_DOCUMENT': csv_session,
            'MONGO_DB_HOST': mongo_session,
            'COUCH_DB_HOST': couch_session,
        }

        for storage_name, session in documents_sessions.iteritems():

            if has_valid_attr(syncb_command.settings, storage_name):

                session.set_up(syncb_command.settings, storage_name)
                syncb_command.sessions.append(session)

        if has_valid_attr(syncb_command.settings, "DATABASE_ENGINE"):

            import_user_module("models", exit=False)
            syncb_command.sessions.append(database_session)
            self.connector = connectors[
                syncb_command.settings.DATABASE_ENGINE](syncb_command.settings)
Example #3
0
    def run(self, run_command):
        """
            Run the crawler of a code project
        """

        crawler = import_user_module("crawlers")
        models = import_user_module("models")

        pool = GreenPool()

        for crawler_class in user_crawlers:

            spider = crawler_class(sessions=run_command.syncdb.sessions, debug=run_command.settings.SHOW_DEBUG_INFO)
            pool.spawn_n(spider.start)

        pool.waitall()

        for session in run_command.syncdb.sessions:
            session.close()
Example #4
0
 def _check_project_type(self):
     """
         Check for the project's type [code based project 
         or dsl templates based project]
     """
     
     if check_for_file(self.settings, "config.ini") and check_for_file(self.settings, "template.crw"):
         self.project_type = TemplateProject()
         
     elif import_user_module("models", exit=False) is not None:            
         self.project_type = CodeProject()
         
     else:
         exit_with_error("Unrecognized crawley project")
Example #5
0
    def set_up(self, browser_tab, is_new=False):
        """
            Starts or opens a crawley's project depending on
            the [is_new] parameter
        """

        os.chdir(self.dir_name)
        os.sys.path[0] = self.project_name

        if is_new:
            cmd = StartProjectCommand(project_type=TemplateProject.name, project_name=self.project_name)
            cmd.execute()

        else:
            self._validate_project()
            self._load_data(browser_tab)

        self.settings = import_user_module("settings")
Example #6
0
    def _check_for_settings(self):
        """
            tries to import the user's settings file
        """

        (options, args) = self.parser.parse_args(self.args)

        if options.settings is not None:

            settings_dir, file_name = os.path.split(options.settings)

            sys.path.insert(0, settings_dir)
            settings_file = os.path.splitext(file_name)[0]

        else:
            sys.path.insert(0, os.getcwd())
            settings_file = "settings"

        settings = import_user_module(settings_file)
        
        sys.path.append(settings.PROJECT_ROOT)
        return settings