Ejemplo n.º 1
0
####################
## Load the files ##
####################

l = Loader(data_destination,
           load_filters=filters,
           post_filters=post_filters,
           tables=tables,
           xpaths=xpaths,
           metadata_xpaths=metadata_xpaths,
           pseudo_empty_tags=pseudo_empty_tags,
           suppress_tags=suppress_tags,
           token_regex=token_regex,
           default_object_level=default_object_level,
           debug=debug)

l.add_files(files)
filenames = l.list_files()
## The following line creates a list of the files to parse and sorts the files by filename
## Should you need to supply a custom sort order from the command line you need to supply the files variable,
## defined at the top of this script, instead of filenames, like so:
## load_metadata = [{"filename":f} for f in files]
load_metadata = [{"filename": f} for f in sorted(filenames)]
l.parse_files(workers, load_metadata)
l.merge_objects()
l.analyze()
l.setup_sql_load()
l.post_processing()
l.finish(**extra_locals)
Ejemplo n.º 2
0
from philologic.LoadOptions import LoadOptions
from philologic.Loader import Loader, setup_db_dir

# Load global config
config_file = imp.load_source("philologic4", "/etc/philologic/philologic4.cfg")

os.environ["LC_ALL"] = "C"  # Exceedingly important to get uniform sort order.
os.environ["PYTHONIOENCODING"] = "utf-8"

if __name__ == '__main__':
    load_options = LoadOptions()
    load_options.parse(sys.argv)
    setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete)

    # Database load
    l = Loader(**load_options.values)
    l.add_files(load_options.files)
    if load_options.bibliography:
        load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order)
    else:
        load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header)
    l.parse_files(load_options.cores, load_metadata)
    l.merge_objects()
    l.analyze()
    l.setup_sql_load()
    l.post_processing()
    l.finish()

    print("Application viewable at %s\n" % os.path.join(config_file.url_root, load_options.dbname))
Ejemplo n.º 3
0
try:
    os.mkdir(template_destination)
except OSError:
    print "The %s database already exists" % dbname
    print "Do you want to delete this database? Yes/No"
    choice = raw_input().lower()
    if choice.startswith('y'):
        os.system('rm -rf %s' % template_destination)
        os.mkdir(template_destination)
    else:
        sys.exit()
os.system("cp -r %s* %s" % (install_dir, template_destination))
os.system("cp %s.htaccess %s" % (install_dir, template_destination))
print "copied templates to %s" % template_destination

####################
## Load the files ##
####################

l = Loader(workers, filters=filters, tables=tables, clean=False)
l.setup_dir(data_destination, files)
l.parse_files(XPaths, Metadata_XPaths, token_regex, non_nesting_tags,
              self_closing_tags, pseudo_empty_tags)
l.merge_objects()
l.analyze()
l.make_tables()
l.finish(Philo_Types, Metadata_XPaths, db_url=db_url)
print >> sys.stderr, "done indexing."
print >> sys.stderr, "db viewable at " + db_url + "/dispatcher.py/form"
Ejemplo n.º 4
0
if template_dir:
    os.system("cp -r %s* %s" % (template_dir,db_destination))
    os.system("cp %s.htaccess %s" % (template_dir,db_destination))


####################
## Load the files ##
####################

l = Loader(data_destination,
           Philo_Types,
           XPaths,
           Metadata_XPaths,
           filters, 
           token_regex,
           non_nesting_tags,
           self_closing_tags,
           pseudo_empty_tags,
           debug=debug)
l.add_files(files)
filenames = l.list_files()
load_metadata = [{"filename":f} for f in sorted(filenames,reverse=True)]
l.parse_files(workers,load_metadata)
l.merge_objects()
l.analyze()
l.make_tables(tables, *r_r_obj)
l.finish(**extra_locals)

print "\nDone indexing."
print "Your database is viewable at " + db_url + "\n"
Ejemplo n.º 5
0
if __name__ == '__main__':
    load_options = LoadOptions()
    load_options.parse(sys.argv)
    setup_db_dir(load_options["db_destination"],
                 load_options["web_app_dir"],
                 force_delete=load_options.force_delete)

    # Database load
    l = Loader(**load_options.values)
    l.add_files(load_options.files)
    if load_options.bibliography:
        load_metadata = l.parse_bibliography_file(load_options.bibliography,
                                                  load_options.sort_order)
    else:
        load_metadata = l.parse_metadata(load_options.sort_order,
                                         header=load_options.header)
    l.parse_files(load_options.cores, load_metadata)
    l.merge_objects()
    l.analyze()
    l.setup_sql_load()
    l.post_processing()
    l.finish()
    if l.deleted_files:
        print(
            "The following files where not loaded due to invalid data in the header:\n{}"
            .format("\n".join(l.deleted_files)))

    print("Application viewable at %s\n" %
          os.path.join(config_file.url_root, load_options.dbname))
Ejemplo n.º 6
0
try:
    os.mkdir(template_destination)
except OSError:
    print "The %s database already exists" % dbname
    print "Do you want to delete this database? Yes/No"
    choice = raw_input().lower()
    if choice.startswith('y'):
        os.system('rm -rf %s' % template_destination)
        os.mkdir(template_destination)
    else:
        sys.exit()
os.system("cp -r %s* %s" % (install_dir,template_destination))
os.system("cp %s.htaccess %s" % (install_dir,template_destination))
print "copied templates to %s" % template_destination


####################
## Load the files ##
####################

l = Loader(workers, filters=filters, tables=tables, clean=True)
l.setup_dir(data_destination,files)
l.parse_files(XPaths,Metadata_XPaths,token_regex,non_nesting_tags,self_closing_tags,pseudo_empty_tags)
l.merge_objects()
l.analyze()
l.make_tables()
l.finish(Philo_Types, Metadata_XPaths,db_url=db_url)
print >> sys.stderr, "done indexing."
print >> sys.stderr, "db viewable at " + db_url
Ejemplo n.º 7
0
## Load the files ##
####################

l = Loader(data_destination,
           token_regex,
           XPaths,
           Metadata_XPaths,
           filters, 
           pseudo_empty_tags,
           suppress_tags,
           default_object_level=default_object_level,
           debug=debug)

#destination,token_regex=default_token_regex,xpaths=default_xpaths,
#                 metadata_xpaths=default_metadata,filters=default_filters,
#                 pseudo_empty_tags=[],suppress_tags=[],console_output=True,
#                 log=False, debug=False)

l.add_files(files)
filenames = l.list_files()
print filenames
load_metadata = [{"filename":f} for f in sorted(filenames)]
l.parse_files(workers,load_metadata)
l.merge_objects()
l.analyze()
l.make_tables(tables)
l.finish(post_filters,**extra_locals)

print "\nDone indexing."
print "Your database is viewable at " + db_url + "\n"
Ejemplo n.º 8
0
data_destination = template_destination + "/data"

try:
    os.mkdir(template_destination)
except OSError:
    print "The %s database already exists" % dbname
    print "Do you want to delete this database? Yes/No"
    choice = raw_input().lower()
    if choice.startswith('y'):
        os.system('rm -rf %s' % template_destination)
        os.mkdir(template_destination)
    else:
        sys.exit()
os.system("cp -r %s* %s" % (install_dir, template_destination))
os.system("cp %s.htaccess %s" % (install_dir, template_destination))
print "copied templates to %s" % template_destination

####################
## Load the files ##
####################

l = Loader(workers, filters=filters, clean=True)
l.setup_dir(data_destination, files)
l.parse_files(XPaths, Metadata_XPaths, non_nesting_tags, self_closing_tags,
              pseudo_empty_tags)
l.merge_objects()
l.analyze()
l.make_tables()
l.finish(Philo_Types, Metadata_XPaths)
print >> sys.stderr, "done indexing."
Ejemplo n.º 9
0
####################
## Load the files ##
####################

l = Loader(data_destination,
           token_regex,
           XPaths,
           Metadata_XPaths,
           filters,
           pseudo_empty_tags,
           suppress_tags,
           default_object_level=default_object_level,
           debug=debug)

#destination,token_regex=default_token_regex,xpaths=default_xpaths,
#                 metadata_xpaths=default_metadata,filters=default_filters,
#                 pseudo_empty_tags=[],suppress_tags=[],console_output=True,
#                 log=False, debug=False)

l.add_files(files)
filenames = l.list_files()
load_metadata = [{"filename": f} for f in sorted(filenames)]
l.parse_files(workers, load_metadata)
l.merge_objects()
l.analyze()
l.make_tables(tables)
l.finish(post_filters, **extra_locals)

print "\nDone indexing."
print "Your database is viewable at " + db_url + "\n"