from philologic.LoadOptions import LoadOptions from philologic.Loader import Loader, setup_db_dir # Load global config config_file = imp.load_source("philologic4", "/etc/philologic/philologic4.cfg") os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. os.environ["PYTHONIOENCODING"] = "utf-8" if __name__ == '__main__': load_options = LoadOptions() load_options.parse(sys.argv) setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete) # Database load l = Loader(**load_options.values) l.add_files(load_options.files) if load_options.bibliography: load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order) else: load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header) l.parse_files(load_options.cores, load_metadata) l.merge_objects() l.analyze() l.setup_sql_load() l.post_processing() l.finish() print("Application viewable at %s\n" % os.path.join(config_file.url_root, load_options.dbname))
#################### ## Load the files ## #################### l = Loader(data_destination, load_filters=filters, post_filters=post_filters, tables=tables, xpaths=xpaths, metadata_xpaths=metadata_xpaths, pseudo_empty_tags=pseudo_empty_tags, suppress_tags=suppress_tags, token_regex=token_regex, default_object_level=default_object_level, debug=debug) l.add_files(files) filenames = l.list_files() ## The following line creates a list of the files to parse and sorts the files by filename ## Should you need to supply a custom sort order from the command line you need to supply the files variable, ## defined at the top of this script, instead of filenames, like so: ## load_metadata = [{"filename":f} for f in files] load_metadata = [{"filename": f} for f in sorted(filenames)] l.parse_files(workers, load_metadata) l.merge_objects() l.analyze() l.setup_sql_load() l.post_processing() l.finish(**extra_locals)
if template_dir: os.system("cp -r %s* %s" % (template_dir,db_destination)) os.system("cp %s.htaccess %s" % (template_dir,db_destination)) #################### ## Load the files ## #################### l = Loader(data_destination, Philo_Types, XPaths, Metadata_XPaths, filters, token_regex, non_nesting_tags, self_closing_tags, pseudo_empty_tags, debug=debug) l.add_files(files) filenames = l.list_files() load_metadata = [{"filename":f} for f in sorted(filenames,reverse=True)] l.parse_files(workers,load_metadata) l.merge_objects() l.analyze() l.make_tables(tables, *r_r_obj) l.finish(**extra_locals) print "\nDone indexing." print "Your database is viewable at " + db_url + "\n"
try: os.mkdir(template_destination) except OSError: print "The %s database already exists" % dbname print "Do you want to delete this database? Yes/No" choice = raw_input().lower() if choice.startswith('y'): os.system('rm -rf %s' % template_destination) os.mkdir(template_destination) else: sys.exit() os.system("cp -r %s* %s" % (install_dir, template_destination)) os.system("cp %s.htaccess %s" % (install_dir, template_destination)) print "copied templates to %s" % template_destination #################### ## Load the files ## #################### l = Loader(workers, filters=filters, tables=tables, clean=False) l.setup_dir(data_destination, files) l.parse_files(XPaths, Metadata_XPaths, token_regex, non_nesting_tags, self_closing_tags, pseudo_empty_tags) l.merge_objects() l.analyze() l.make_tables() l.finish(Philo_Types, Metadata_XPaths, db_url=db_url) print >> sys.stderr, "done indexing." print >> sys.stderr, "db viewable at " + db_url + "/dispatcher.py/form"
if __name__ == '__main__': load_options = LoadOptions() load_options.parse(sys.argv) setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete) # Database load l = Loader(**load_options.values) l.add_files(load_options.files) if load_options.bibliography: load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order) else: load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header) l.parse_files(load_options.cores, load_metadata) l.merge_objects() l.analyze() l.setup_sql_load() l.post_processing() l.finish() if l.deleted_files: print( "The following files where not loaded due to invalid data in the header:\n{}" .format("\n".join(l.deleted_files))) print("Application viewable at %s\n" % os.path.join(config_file.url_root, load_options.dbname))
try: os.mkdir(template_destination) except OSError: print "The %s database already exists" % dbname print "Do you want to delete this database? Yes/No" choice = raw_input().lower() if choice.startswith('y'): os.system('rm -rf %s' % template_destination) os.mkdir(template_destination) else: sys.exit() os.system("cp -r %s* %s" % (install_dir,template_destination)) os.system("cp %s.htaccess %s" % (install_dir,template_destination)) print "copied templates to %s" % template_destination #################### ## Load the files ## #################### l = Loader(workers, filters=filters, tables=tables, clean=True) l.setup_dir(data_destination,files) l.parse_files(XPaths,Metadata_XPaths,token_regex,non_nesting_tags,self_closing_tags,pseudo_empty_tags) l.merge_objects() l.analyze() l.make_tables() l.finish(Philo_Types, Metadata_XPaths,db_url=db_url) print >> sys.stderr, "done indexing." print >> sys.stderr, "db viewable at " + db_url