def build_tree(tree, tree_indexers, verbose): """Set up env vars, and run the build command.""" if not tree.build_command: return # Set up build environment variables: environ = os.environ.copy() for ti in tree_indexers: environ.update(ti.environment(environ)) # Call make or whatever: with open_log(tree.log_folder, 'build.log', verbose) as log: print 'Building tree' workers = max(tree.config.workers, 1) r = subprocess.call( tree.build_command.replace('$jobs', str(workers)) .format(workers=workers), shell = True, stdout = log, stderr = log, env = environ, cwd = tree.object_folder ) # Abort if build failed: if r != 0: print >> sys.stderr, ("Build command for '%s' failed, exited non-zero." % tree.name) if not verbose: print >> sys.stderr, 'Log follows:' with open(log.name) as log_file: print >> sys.stderr, ' | %s ' % ' | '.join(log_file) raise BuildError
def build_tree(tree, tree_indexers, verbose): """Set up env vars, and run the build command.""" if not tree.build_command: return # Set up build environment variables: environ = os.environ.copy() for ti in tree_indexers: environ.update(ti.environment(environ)) # Call make or whatever: with open_log(tree.log_folder, 'build.log', verbose) as log: print 'Building tree' workers = max(tree.workers, 1) r = subprocess.call(tree.build_command.replace( '$jobs', str(workers)).format(workers=workers), shell=True, stdout=log, stderr=log, env=environ, cwd=tree.object_folder) # Abort if build failed: if r != 0: print >> sys.stderr, ( "Build command for '%s' failed, exited non-zero." % tree.name) if not verbose: print >> sys.stderr, 'Log follows:' with open(log.name) as log_file: print >> sys.stderr, ' | %s ' % ' | '.join(log_file) raise BuildError
def index_chunk(tree, tree_indexers, paths, index, swallow_exc=False, worker_number=None): """Index a pile of files. This is the entrypoint for indexer pool workers. :arg worker_number: A unique number assigned to this worker so it knows what to call its log file """ path = '(no file yet)' try: # So we can use Flask's url_from(): with make_app(tree.config).test_request_context(): es = current_app.es try: # Don't log if single-process: log = (worker_number and open_log( tree.log_folder, 'index-chunk-%s.log' % worker_number)) for path in paths: log and log.write('Starting %s.\n' % path) index_file(tree, tree_indexers, path, es, index) log and log.write('Finished chunk.\n') finally: log and log.close() except Exception as exc: if swallow_exc: type, value, traceback = exc_info() return format_exc(), type, value, path else: raise
def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_db(tree.target_folder) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute(""" SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_db(tree.target_folder) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute( """ SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def build_tree(tree, conn, verbose): """Build the tree, pre_process, build and post_process.""" # Load indexers indexers = load_indexers(tree) # Get system environment variables environ = {} for key, val in os.environ.items(): environ[key] = val # Let plugins preprocess # modify environ, change makefile, hack things whatever! for indexer in indexers: indexer.pre_process(tree, environ) # Add source and build directories to the command environ["source_folder"] = tree.source_folder environ["build_folder"] = tree.object_folder # Open log file with open_log(tree, 'build.log', verbose) as log: # Call the make command print "Building the '%s' tree" % tree.name r = subprocess.call( tree.build_command.replace('$jobs', tree.config.nb_jobs), shell = True, stdout = log, stderr = log, env = environ, cwd = tree.object_folder ) # Abort if build failed! if r != 0: print >> sys.stderr, ("Build command for '%s' failed, exited non-zero." % tree.name) if not verbose: print >> sys.stderr, 'Log follows:' with open(log.name) as log_file: print >> sys.stderr, ' | %s ' % ' | '.join(log_file) sys.exit(1) # Let plugins post process for indexer in indexers: indexer.post_process(tree, conn)
def index_chunk(tree, tree_indexers, paths, index, swallow_exc=False, worker_number=None): """Index a pile of files. This is the entrypoint for indexer pool workers. :arg worker_number: A unique number assigned to this worker so it knows what to call its log file """ path = '(no file yet)' try: # So we can use Flask's url_from(): with make_app(tree.config).test_request_context(): es = current_app.es try: # Don't log if single-process: log = (worker_number and open_log(tree.log_folder, 'index-chunk-%s.log' % worker_number)) for path in paths: log and log.write('Starting %s.\n' % path.encode('utf-8')) index_file(tree, tree_indexers, path, es, index) log and log.write('Finished chunk.\n') finally: log and log.close() except Exception as exc: if swallow_exc: type, value, traceback = exc_info() return format_exc(), type, value, path else: raise