def syncdb(self, syncb_command): """ Checks for storages configuration in the settings.py file """ self.connector = None syncb_command.sessions = [] documents_sessions = { 'JSON_DOCUMENT' : json_session, 'XML_DOCUMENT' : xml_session, 'CSV_DOCUMENT' : csv_session, 'MONGO_DB_HOST' : mongo_session, 'COUCH_DB_HOST' : couch_session, } for storage_name, session in documents_sessions.iteritems(): if has_valid_attr(syncb_command.settings, storage_name): session.set_up(syncb_command.settings, storage_name) syncb_command.sessions.append(session) if has_valid_attr(syncb_command.settings, "DATABASE_ENGINE"): import_user_module("models", exit=False) syncb_command.sessions.append(database_session) self.connector = connectors[syncb_command.settings.DATABASE_ENGINE](syncb_command.settings)
def syncdb(self, syncb_command): """ Checks for storages configuration in the settings.py file """ self.connector = None syncb_command.sessions = [] documents_sessions = { 'JSON_DOCUMENT': json_session, 'XML_DOCUMENT': xml_session, 'CSV_DOCUMENT': csv_session, 'MONGO_DB_HOST': mongo_session, 'COUCH_DB_HOST': couch_session, } for storage_name, session in documents_sessions.iteritems(): if has_valid_attr(syncb_command.settings, storage_name): session.set_up(syncb_command.settings, storage_name) syncb_command.sessions.append(session) if has_valid_attr(syncb_command.settings, "DATABASE_ENGINE"): import_user_module("models", exit=False) syncb_command.sessions.append(database_session) self.connector = connectors[ syncb_command.settings.DATABASE_ENGINE](syncb_command.settings)
def run(self, run_command): """ Run the crawler of a code project """ crawler = import_user_module("crawlers") models = import_user_module("models") pool = GreenPool() for crawler_class in user_crawlers: spider = crawler_class(sessions=run_command.syncdb.sessions, debug=run_command.settings.SHOW_DEBUG_INFO) pool.spawn_n(spider.start) pool.waitall() for session in run_command.syncdb.sessions: session.close()
def _check_project_type(self): """ Check for the project's type [code based project or dsl templates based project] """ if check_for_file(self.settings, "config.ini") and check_for_file(self.settings, "template.crw"): self.project_type = TemplateProject() elif import_user_module("models", exit=False) is not None: self.project_type = CodeProject() else: exit_with_error("Unrecognized crawley project")
def set_up(self, browser_tab, is_new=False): """ Starts or opens a crawley's project depending on the [is_new] parameter """ os.chdir(self.dir_name) os.sys.path[0] = self.project_name if is_new: cmd = StartProjectCommand(project_type=TemplateProject.name, project_name=self.project_name) cmd.execute() else: self._validate_project() self._load_data(browser_tab) self.settings = import_user_module("settings")
def _check_for_settings(self): """ tries to import the user's settings file """ (options, args) = self.parser.parse_args(self.args) if options.settings is not None: settings_dir, file_name = os.path.split(options.settings) sys.path.insert(0, settings_dir) settings_file = os.path.splitext(file_name)[0] else: sys.path.insert(0, os.getcwd()) settings_file = "settings" settings = import_user_module(settings_file) sys.path.append(settings.PROJECT_ROOT) return settings