def main(src_info, evitar_iso, verbose, desconectado, procesar_articles, include_windows, tarball): if procesar_articles: try: import SuffixTree except ImportError: print NO_ST_MSG articulos = path.join(src_info, "articles") mensaje("Comenzando!") preparaTemporal(procesar_articles) mensaje("Copiando los assets") copiarAssets(src_info, config.DIR_ASSETS) if procesar_articles: mensaje("Preprocesando") if not path.exists(articulos): print "\nERROR: No se encuentra el directorio %r" % articulos print "Este directorio es obligatorio para el procesamiento general" sys.exit() cantnew, cantold = preprocesar.run(articulos, verbose) print ' total %d páginas procesadas' % cantnew print ' y %d que ya estaban de antes' % cantold mensaje("Calculando los que quedan y los que no") preprocesar.calcula_top_htmls() mensaje("Generando el log de imágenes") taken, adesc = extraer.run(verbose) print ' total: %5d imágenes extraídas' % taken print ' %5d a descargar' % adesc else: mensaje("Evitamos procesar artículos y generar el log de imágenes") mensaje("Recalculando porcentajes de reducción") calcular.run(verbose) if not desconectado: mensaje("Descargando las imágenes de la red") download.traer(verbose) mensaje("Reduciendo las imágenes descargadas") notfound = reducir.run(verbose) mensaje("Emblocando las imágenes reducidas") # agrupamos las imagenes en bloques result = ImageManager.generar_bloques(verbose) print ' total: %d bloques con %d imags' % result if procesar_articles: mensaje("Generando el índice") result = cdpindex.generar_de_html(articulos, verbose) print ' total: %d archivos' % result mensaje("Generando los bloques de artículos") result = ArticleManager.generar_bloques(verbose) print ' total: %d bloques con %d archivos y %d redirects' % result else: mensaje("Evitamos generar el índice y los bloques") mensaje("Copiando las fuentes") copiarSources() mensaje("Copiando los indices") dest_src = path.join(config.DIR_CDBASE, "cdpedia", "indice") if os.path.exists(dest_src): shutil.rmtree(dest_src) shutil.copytree(config.DIR_INDICE, dest_src) if include_windows: mensaje("Copiando cosas para Windows") copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE) mensaje("Generamos la config para runtime") genera_run_config() if not evitar_iso: mensaje("Armamos el ISO") armarIso("cdpedia.iso") if tarball: mensaje("Armamos el tarball con %r" % (tarball,)) build_tarball(tarball) mensaje("Todo terminado!")
def main(lang, src_info, version, lang_config, gendate, verbose=False, desconectado=False, procesar_articles=True): # don't affect the rest of the machine make_it_nicer() if procesar_articles: try: import SuffixTree # NOQA except ImportError: logger.warning(NO_ST_MSG) # validate lang and versions, and fix config with selected data logger.info("Fixing config for lang=%r version=%r", lang, version) try: _lang_conf = config.imagtypes[lang] except KeyError: print "Not a valid language! try one of", config.imagtypes.keys() exit() try: config.imageconf = _lang_conf[version] except KeyError: print "Not a valid version! try one of", _lang_conf.keys() exit() config.langconf = lang_config logger.info("Starting!") preparaTemporal(procesar_articles) logger.info("Copying the assets and locale files") copy_assets(src_info, config.DIR_ASSETS) shutil.copytree('locale', path.join(config.DIR_CDBASE, "locale")) articulos = path.join(src_info, "articles") if procesar_articles: logger.info("Preprocessing") if not path.exists(articulos): logger.error("Couldn't find articles dir: %r", articulos) raise EnvironmentError("Directory not found, can't continue") sys.exit() cantnew, cantold = preprocesar.run(articulos) logger.info("Processed pages: %d new, %d from before", cantnew, cantold) logger.info("Calculating which stay and which don't") preprocesar.pages_selector.calculate() logger.info("Generating the images log") taken, adesc = extract.run() logger.info("Extracted %d images, need to download %d", taken, adesc) else: logger.info("Avoid processing articles and generating images log") logger.info("Recalculating the reduction percentages.") calcular.run() if not desconectado: logger.info("Downloading the images from the internet") download.retrieve() logger.info("Reducing the downloaded images") reducir.run(verbose) logger.info("Putting the reduced images into blocks") # agrupamos las imagenes en bloques q_blocks, q_images = ImageManager.generar_bloques(verbose) logger.info("Got %d blocks with %d images", q_blocks, q_images) if not procesar_articles: logger.info("Not generating index and blocks (by user request)") elif preprocesar.pages_selector.same_info_through_runs: logger.info("Same articles than previous run " "(not generating index and blocks)") else: logger.info("Generating the index") result = cdpindex.generar_de_html(articulos, verbose) logger.info("Got %d files", result) logger.info("Generating the articles blocks") q_blocks, q_files, q_redirs = ArticleManager.generar_bloques(lang, verbose) logger.info("Got %d blocks with %d files and %d redirects", q_blocks, q_files, q_redirs) logger.info("Copying the sources") copy_sources() logger.info("Generating the links to blocks and indexes") # blocks dest = path.join(config.DIR_CDBASE, "cdpedia", "bloques") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_BLOQUES), dest) # indexes dest = path.join(config.DIR_CDBASE, "cdpedia", "indice") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_INDICE), dest) if config.imageconf["windows"]: logger.info("Copying Windows stuff") # generated by pyinstaller 2.0 copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE) logger.info("Generating runtime config") genera_run_config() base_dest_name = "cdpedia-%s-%s-%s-%s" % (lang, config.VERSION, gendate, version) if config.imageconf["type"] == "iso": logger.info("Building the ISO: %r", base_dest_name) build_iso(base_dest_name) elif config.imageconf["type"] == "tarball": logger.info("Building the tarball: %r", base_dest_name) build_tarball(base_dest_name) else: raise ValueError("Unrecognized image type") logger.info("All done!")
""" parser = optparse.OptionParser() parser.set_usage(msg) parser.add_option("-v", "--verbose", action="store_true", dest="verbose", help="muestra info de lo que va haciendo") parser.add_option("-i", "--image", action="store_true", dest="image", help=u"busca en imagenes (busca artículos por default)") (options, args) = parser.parse_args() if len(args) != 1: parser.print_help() exit() nom_item = args[0].decode("utf8") verbose = bool(options.verbose) if options.image: manager = ImageManager(verbose=verbose) else: manager = ArticleManager(verbose=verbose) main(manager, nom_item, verbose)
def main(lang, src_info, version, lang_config, gendate, verbose=False, desconectado=False, procesar_articles=True): # don't affect the rest of the machine make_it_nicer() if procesar_articles: try: import SuffixTree # NOQA except ImportError: logger.warning( "Import error on SuffixTree; compressed index generation will be REALLY slow. " "Please install it (download, python2 setup.py build, python2 setup.py install) " "from here: http://taniquetil.com.ar/facundo/SuffixTree-0.7.1-8bit.tar.bz2" ) # validate lang and versions, and fix config with selected data logger.info("Fixing config for lang=%r version=%r", lang, version) try: _lang_conf = config.imagtypes[lang] except KeyError: print("ERROR: %r is not a valid language! try one of %s" % (lang, config.imagtypes.keys())) exit() try: config.imageconf = _lang_conf[version] except KeyError: print("ERROR: %r is not a valid version! try one of %s" % (version, _lang_conf.keys())) exit() config.langconf = lang_config logger.info("Starting!") preparaTemporal(procesar_articles) logger.info("Copying the assets and locale files") copy_assets(src_info, config.DIR_ASSETS) shutil.copytree('locale', path.join(config.DIR_CDBASE, "locale")) articulos = path.join(src_info, "articles") if procesar_articles: logger.info("Preprocessing") if not path.exists(articulos): logger.error("Couldn't find articles dir: %r", articulos) raise EnvironmentError("Directory not found, can't continue") sys.exit() preprocesar.run(articulos) logger.info("Calculating which stay and which don't") preprocesar.pages_selector.calculate() logger.info("Generating the images log") taken, adesc = extract.run() logger.info("Extracted %d images, need to download %d", taken, adesc) else: logger.info("Avoid processing articles and generating images log") logger.info("Recalculating the reduction percentages.") calcular.run() if not desconectado: logger.info("Downloading the images from the internet") download.retrieve() logger.info("Reducing the downloaded images") reducir.run(verbose) logger.info("Putting the reduced images into blocks") # agrupamos las imagenes en bloques q_blocks, q_images = ImageManager.generar_bloques(verbose) logger.info("Got %d blocks with %d images", q_blocks, q_images) if not procesar_articles: logger.info("Not generating index and blocks (by user request)") elif preprocesar.pages_selector.same_info_through_runs: logger.info("Same articles than previous run " "(not generating index and blocks)") else: logger.info("Generating the index") result = cdpindex.generar_de_html(articulos, verbose) logger.info("Got %d files", result) logger.info("Generating the articles blocks") q_blocks, q_files, q_redirs = ArticleManager.generar_bloques( lang, verbose) logger.info("Got %d blocks with %d files and %d redirects", q_blocks, q_files, q_redirs) logger.info("Copying the sources") copy_sources() logger.info("Generating the links to blocks and indexes") # blocks dest = path.join(config.DIR_CDBASE, "cdpedia", "bloques") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_BLOQUES), dest) # indexes dest = path.join(config.DIR_CDBASE, "cdpedia", "indice") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_INDICE), dest) if config.imageconf["windows"]: logger.info("Copying Windows stuff") # generated by pyinstaller 2.0 copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE) logger.info("Generating runtime config") genera_run_config() base_dest_name = "cdpedia-%s-%s-%s-%s" % (lang, config.VERSION, gendate, version) if config.imageconf["type"] == "iso": logger.info("Building the ISO: %r", base_dest_name) build_iso(base_dest_name) elif config.imageconf["type"] == "tarball": logger.info("Building the tarball: %r", base_dest_name) build_tarball(base_dest_name) else: raise ValueError("Unrecognized image type") logger.info("All done!")
def main(lang, src_info, version, lang_config, gendate, images_dump_dir, verbose=False, desconectado=False, process_articles=True): """Generate the CDPedia tarball or iso.""" # don't affect the rest of the machine make_it_nicer() # set language in config if config.LANGUAGE is None: config.LANGUAGE = lang config.URL_WIKIPEDIA = config.URL_WIKIPEDIA_TPL.format(lang=lang) # validate lang and versions, and fix config with selected data logger.info("Fixing config for lang=%r version=%r", lang, version) try: _lang_conf = config.imagtypes[lang] except KeyError: available_langs = list(config.imagtypes.keys()) logger.error("%r is not a valid language! try one of %s", lang, available_langs) exit() try: config.imageconf = _lang_conf[version] except KeyError: available_versions = list(_lang_conf.keys()) logger.error("%r is not a valid version! try one of %s", version, available_versions) exit() config.langconf = lang_config logger.info("Starting!") prepare_temporary_dirs(process_articles) logger.info("Copying the assets and locale files") dst_assets = os.path.join(config.DIR_CDBASE, 'assets') copy_assets(src_info, dst_assets) link(os.path.join(src_info, 'portal_pages.txt'), config.DIR_TEMP) copy_dir('locale', path.join(config.DIR_CDBASE, "locale")) set_locale(lang_config.get('second_language'), record=True) logger.info("Copying '%s' stylesheet and associated media resources", config.CSS_FILENAME) copy_css(src_info, dst_assets) articulos = path.join(src_info, "articles") if process_articles: logger.info("Preprocessing") if not path.exists(articulos): logger.error("Couldn't find articles dir: %r", articulos) raise EnvironmentError("Directory not found, can't continue") sys.exit() preprocess.run(articulos) logger.info("Calculating which stay and which don't") preprocess.pages_selector.calculate() logger.info("Generating the images log") taken, adesc = extract.run() logger.info("Extracted %d images, need to download %d", taken, adesc) else: logger.info("Avoid processing articles and generating images log") logger.info("Recalculating the reduction percentages.") calculate.run() if not desconectado: logger.info("Downloading the images from the internet") download.retrieve(images_dump_dir) logger.info("Reducing the downloaded images") scale.run(verbose, images_dump_dir) if config.EMBED_IMAGES: logger.info("Embedding selected images") embed.run(images_dump_dir) logger.info("Putting the reduced images into blocks") # agrupamos las imagenes en bloques q_blocks, q_images = ImageManager.generar_bloques(verbose) logger.info("Got %d blocks with %d images", q_blocks, q_images) if not process_articles: logger.info("Not generating index and blocks (by user request)") elif preprocess.pages_selector.same_info_through_runs: logger.info("Same articles than previous run " "(not generating index and blocks)") else: logger.info("Generating the index") result = cdpindex.generate_from_html(articulos, verbose) logger.info("Got %d files", result) logger.info("Generating the articles blocks") q_blocks, q_files, q_redirs = ArticleManager.generar_bloques( lang, verbose) logger.info("Got %d blocks with %d files and %d redirects", q_blocks, q_files, q_redirs) logger.info("Copying the sources and libs") copy_sources() generate_libs() # Copy python docs pydocs.clone(lang, lang_config, os.path.dirname(src_info)) logger.info("Generating the links to blocks and indexes") # pages blocks dest = path.join(config.DIR_CDBASE, "pages") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_PAGES_BLOCKS), dest) # images blocks dest = path.join(config.DIR_CDBASE, "images") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_IMAGES_BLOCKS), dest) # indexes dest = path.join(config.DIR_CDBASE, "indice") if os.path.exists(dest): os.remove(dest) os.symlink(path.abspath(config.DIR_INDICE), dest) if config.imageconf["windows"]: logger.info("Copying Windows stuff") copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE) # unpack embeddable python distribution for win32 py_win_zip = "resources/autorun.win/python-win32.zip" py_win_dst = os.path.join(config.DIR_CDBASE, 'python') with zipfile.ZipFile(py_win_zip, 'r') as zh: zh.extractall(py_win_dst) logger.info("Generating runtime config") gen_run_config(lang_config) base_dest_name = "cdpedia-%s-%s-%s-%s" % (lang, config.VERSION, gendate, version) if config.imageconf["type"] == "iso": logger.info("Building the ISO: %r", base_dest_name) build_iso(base_dest_name) elif config.imageconf["type"] == "tarball": logger.info("Building the tarball: %r", base_dest_name) build_tarball(base_dest_name) else: raise ValueError("Unrecognized image type") logger.info("All done!")