def pluralize_units(time_units):
    """
        update the give dict of time units and add plural entries.
    """
    plural_units = {}
    for key in time_units:
        plural_units[pluralize(key)] = time_units[key]
    time_units.update(plural_units)
    return time_units
def pluralize_units(time_units):
    """
        update the give dict of time units and add plural entries.
    """
    plural_units = {}
    for key in time_units:
        plural_units[pluralize(key)] = time_units[key]
    time_units.update(plural_units)
    return time_units
Beispiel #3
0
def article(i,t="type"):
  if len(i)==1:
    if i[t] in ('pants','glasses'): return i[t]
    if i[t][0] in ('a','e','i','o','u'):
      return "an %s"%i[t]
    else: 
      return "a %s"%i[t]
  else: 
    return "some %s"%pluralize(i[0][t])
Beispiel #4
0
    def get(self):

        self._resulting_query = "SELECT "
        self._resulting_query += pluralize(type(self).__name__) + ".id AS " + type(self).__name__ + "_id, "
        for tbl in self._has_one:
            self._resulting_query += pluralize(type(tbl).__name__) + ".id AS " + type(tbl).__name__ + "_id, "
        if self._select:
            for k,v in self._select.iteritems():
                if v:
                    self._resulting_query += "%s AS %s, " % (k, v)
                else:
                    self._resulting_query += k + " "
            for i in self._select_related:
                self._resulting_query += i + ", "
            self._resulting_query = self._resulting_query.strip(", ") + " "
        else:
            self._resulting_query += "* "

        self._resulting_query += "FROM " + pluralize(type(self).__name__) + " "

        for tbl in self._has_many:
            tblname = pluralize(type(tbl).__name__)
            single = type(tbl).__name__
            self._resulting_query += "INNER JOIN %s ON %s = %s" % (tblname,
                                                                   tblname + "." + type(self).__name__ + "_id",
                                                                    type(self).__name__ + ".id") + " "
        for tbl in self._has_one:
            tblname = pluralize(type(tbl).__name__)
            single = type(tbl).__name__
            self._resulting_query += "INNER JOIN %s ON %s = %s" % (tblname,
                                                                   pluralize(type(self).__name__) + "." + single + "_id",
                                                                    tblname + ".id") + " "
        for tbl in self._has_many_to_many:
            jointbl = type(self).__name__ + "_" + pluralize(type(tbl).__name__)
            thissingle = type(self).__name__
            thisplural = pluralize(type(self).__name__)
            tblsingle = type(tbl).__name__
            tblplural = pluralize(type(tbl).__name__)
            self._resulting_query += "INNER JOIN %s ON %s = %s" % (jointbl,
                                                                 jointbl + "." + thissingle + "_id",
                                                                 thisplural + ".id") + " "
            self._resulting_query += "INNER JOIN %s ON %s = %s" % (tblplural,
                                                                   jointbl + "." + tblsingle + "_id",
                                                                   tblplural + ".id") + " "

        self._build_where()


        return self._resulting_query
    def create_additional_indices(self):
        print "creating indices for creator and contributors"
        try:
            cur = self.db.cursor()
            sql = "CREATE INDEX {0}_index ON gutenberg_{1}({0});"

            for name in self.AUX_COLUMN_NAMES + ['file']:
                cur.execute(sql.format(name, pluralize(name)))
            self.db.commit()
        except:
            self.db.rollback()
            raise
 def test_pluralize(self):
     self.assertEqual(pluralize(["cow", "pig", "cow", "cow"]),
                      {"cows", "pig"})
     self.assertEqual(pluralize(["table", "table", "table"]), {"tables"})
     self.assertEqual(pluralize(["chair", "pencil", "arm"]),
                      {"chair", "pencil", "arm"})
     self.assertEqual(pluralize(["list"]), {"list"})
     self.assertEqual(
         pluralize([
             "set",
             "set",
             "tuple",
             "tuple",
             "string",
             "string",
             "string",
             "string",
             "integer",
         ]),
         {"sets", "tuples", "strings", "integer"},
     )
    def create_additional_indices(self):
        print "creating indices for creator and contributors"
        try:
            cur = self.db.cursor()
            sql = "CREATE INDEX {0}_index ON gutenberg_{1}({0});"

            for name in self.AUX_COLUMN_NAMES + ['file']:
                cur.execute(sql.format(name, pluralize(name)))
            self.db.commit()
        except:
            self.db.rollback()
            raise
Beispiel #8
0
    def save(self, force=False):
        props = dir(self)
        tblname = pluralize(type(self).__name__)
        propdict = {}
        id = None
        if type(self).__name__ + "_id" in props:
            id = getattr(self, type(self).__name__ + "_id")

        for prop in self._select:
            attr = getattr(self, prop)
            if prop[0] == '_':
                continue
            propdict[prop] = getattr(self, prop)

        if id is not None:
            self._resulting_query = "UPDATE " + tblname + " SET "
            for k,v in propdict.iteritems():
                if type(v) == type(0):
                    self._resulting_query += k + " = " + v + ", "
                elif type(v) == type(""):
                    self._resulting_query += k + " = '" + v + "', "
            self._resulting_query = self._resulting_query.rstrip(', ') + " "
            self._build_where()
            if "WHERE" in self._resulting_query:
                self._resulting_query += "AND " + tblname + ".id = " + str(id) + " "
            else:
                self._resulting_query += "WHERE " + tblname + ".id = " + str(id) + " "
        else:
            self._resulting_query = "INSERT INTO " + tblname + " "
            self._resulting_query += "("
            for key in propdict.keys():
                self._resulting_query += key + ","
                self._resulting_query = self._resulting_query.rstrip(', ') + " "
            self._resulting_query += ") VALUES ("
            for val in propdict.values():
                if type(val) == type(0):
                    self._resulting_query += str(val) + ","
                else:
                    self._resulting_query += "'" + val + "',"
            self._resulting_query = self._resulting_query.rstrip(', ') + " "
            self._resulting_query += ")"
        return self._resulting_query
def test_es_word_pluralization():
    assert pluralize("box") == "boxes"
    assert pluralize("guess") == "guesses"
def test_s_words():
    assert pluralize("cat") == "cats"
    assert pluralize("dog") == "dogs"
def test_irregular_words():
    assert pluralize("ox") == "oxen"
    assert pluralize("goose") == "geese"
    assert pluralize("moose") == "moose"
    assert pluralize("deer") == "deer"
def test_words_with_double_consonants():
    assert pluralize("gas") == "gasses"
class GutenbergDbCreator:
    # schema dictionary format:
    #   { TABLE_NAME : [ (COL1, DEF1), (COL2, DEF2), ... ] }
    # column names selected to match record keys from rdf_parser
    # NOT NULL designation on PRIMARY KEY seems to help sqlite generate a key when inserting
    MAIN_TABLE_SCHEMA = {
        'gutenberg_books': [('textId', 'TEXT PRIMARY KEY NOT NULL'),
                            ('title', 'TEXT'), ('friendlytitle', 'TEXT'),
                            ('downloads', 'INT'),
                            ('title_order', 'INT UNIQUE')]
    }
    AUX_COLUMN_NAMES = [
        'contributor', 'creator', 'subject', 'language', 'category'
    ]
    # each aux table one column which contains a unique value. The schema can be generated from the aux column list.
    AUX_TABLE_SCHEMA = {
        "gutenberg_%s" % pluralize(name):
        [('id', 'INTEGER PRIMARY KEY NOT NULL'), (name, 'TEXT UNIQUE')]
        for name in AUX_COLUMN_NAMES
    }
    FILE_TABLE_SCHEMA = {
        "gutenberg_files":
        [('id', 'INTEGER PRIMARY KEY NOT NULL'), ('file', 'TEXT UNIQUE'),
         ('format', 'TEXT'),
         ('textId',
          'TEXT REFERENCES gutenberg_books(textId) ON DELETE CASCADE')]
    }

    def __init__(self, filename, debug=False):
        """
        Create/recreate the schema

        :param filename: database filename
        :param debug: boolean whether to enable verbose output
        """
        self.db = sqlite3.connect(filename)
        self.debug = debug

        # create a look dict so we can find the mapping table given the auxiliary table name
        self.mapping_table_lookup = {}

        self._create_table_from_schema(self.MAIN_TABLE_SCHEMA)
        self._create_table_from_schema(self.AUX_TABLE_SCHEMA)
        self._create_table_from_schema(self.FILE_TABLE_SCHEMA)
        self._create_many2many_tables(self.MAIN_TABLE_SCHEMA,
                                      self.AUX_TABLE_SCHEMA)

        # add columns for download count associated with books liked to creators and contributors
        self._add_downloads_column("gutenberg_creators")
        self._add_downloads_column("gutenberg_contributors")

    def _create_table_from_schema(self, schema_map):
        def collect_columns_from_schema(col_schema):
            """ Return comma separated columns and def for SQL create """
            return ','.join([
                ' '.join(col_name_and_type) for col_name_and_type in col_schema
            ])

        for table_name, schema in schema_map.items():
            print "creating table " + table_name
            self.db.execute("DROP TABLE IF EXISTS %s" % table_name)
            sql_create = "CREATE TABLE %s(%s)" % (
                table_name, collect_columns_from_schema(schema))
            if self.debug:
                print sql_create
            self.db.execute(sql_create)

    def _create_many2many_tables(self, main_schema, aux_schemas):
        """
        Create many-to-many tables based on supplied schema dicts.
        Many to many tables are created specially.  The main table has a many-to-many relationship with
        each auxiliary table.
        Also populates the mapping lookup table.
        :param main_schema: schema description dict as described above. Expected to have one table.
        :param aux_schemas: schema description dict as described above. Likely to have many tables.
        """
        # Autogeneration of mapping assumes there is only one main table again which all auxiliary table are mapped
        assert (len(main_schema) == 1)
        CREATE_TABLE_TEMPLATE = (
            "CREATE TABLE {table_name}({book_id_name} INT REFERENCES {main_table_name}(textId) ON DELETE CASCADE, "
            "{aux_id_name} INT REFERENCES {aux_table_name}(id) ON DELETE CASCADE, PRIMARY KEY({book_id_name},{aux_id_name}))"
        )
        for main_table_name in main_schema:
            for aux_table_name, aux_cols in aux_schemas.items():
                # Assumed aux tables only contain two columns, the second of which describes the unique content
                column_name = self._get_aux_table_unique_column_name(aux_cols)
                table_name = "{0}_{1}_map".format(main_table_name, column_name)
                print "creating many-to-many mapping table " + table_name

                # column names
                book_id_name = "book_id"
                aux_id_name = "%s_id" % column_name

                self.db.execute("DROP TABLE IF EXISTS %s" % table_name)
                sql_create = CREATE_TABLE_TEMPLATE.format(
                    table_name=table_name,
                    main_table_name=main_table_name,
                    book_id_name=book_id_name,
                    aux_table_name=aux_table_name,
                    aux_id_name=aux_id_name)
                if self.debug:
                    print sql_create
                self.db.execute(sql_create)

                # record association between mapping table and auxiliary table
                self.mapping_table_lookup[aux_table_name] = {
                    table_name: [book_id_name, aux_id_name]
                }

    def _add_downloads_column(self, table):
        try:
            cur = self.db.cursor()
            cur.execute("ALTER TABLE %s ADD COLUMN downloads INTEGER;" % table)
            self.db.commit()
        except:
            self.db.rollback()
            raise

    def _get_aux_table_unique_column_name(self, aux_cols):
        """
        Return the unique value column name
        Auxiliary tables have typically have two columns, an id and a unique value column. Given
        an array of column definitions, this returns the column name of the unique column.
        :param aux_cols:   list of (colname, coldef) pairs
        :returns: colname of column with UNIQUE in column definition.
        """
        assert (len(aux_cols) == 2)
        for column_name, column_def in aux_cols:
            if column_def.find("UNIQUE") != -1:
                return column_name
        assert (
            False
        )  # The aux tables contain list of unique values -- one of the columns should be defined unique!

    def _create_insert_sql(self, table_name, col_schema):
        """
        Create an insert statement for use by main and auxiliary tables
        Omits id so it will be auto generated. (Note that books primary key changed
        so provided explicitly but column name is not 'id'.)
        Omit title_order because values will be added to it after all data is populated
        """
        excluded_columns = ['id', 'title_order']
        return "INSERT INTO %s (%s) VALUES (%s)" % (table_name, ','.join([
            name for (name, _) in col_schema if name not in excluded_columns
        ]), ','.join([
            ":" + name
            for (name, _) in col_schema if name not in excluded_columns
        ]))

    def _insert_mapping_from_book_to_aux(self, cursor, aux_table_name, book_id,
                                         aux_id):
        """
        Inserts the many-to-many association between book_id and aux_id
        """
        assert (len(self.mapping_table_lookup[aux_table_name]) == 1
                )  # expects one mapping table else must revise
        for map_table_name, map_col_names in self.mapping_table_lookup[
                aux_table_name].items():
            (book_id_name, aux_id_name) = map_col_names
            insertSql = "INSERT INTO {0} ({1},{2}) VALUES (:{1}, :{2})".format(
                map_table_name, book_id_name, aux_id_name)
            values = {book_id_name: book_id, aux_id_name: aux_id}
            if self.debug:
                print insertSql, values
            cursor.execute(insertSql, values)

    def _select_id_or_insert(self, cursor, selectSql, insertSql, col_name,
                             value):
        """
        Auxiliary tables contain unique values so return the id for the value if it already has been inserted
        If it hasn't been inserted, do so and return the new id.
        :param cursor:      working database cursor
        :param selectSql:   sql select statement ready for execution with supplied value. table name already embedded.
        :param insertSql:   sql insert statement ready for execution with supplied value. table name already embedded.
        :param col_name:    column name associated with the value
        :param value:       unicode string to be put in the database
        :returns:           id of value in the table
        """
        # check if value already in table
        aux_id = cursor.execute(selectSql, (value, )).fetchone()
        if isinstance(aux_id, tuple):
            (aux_id, ) = aux_id
        if aux_id is None:
            cursor.execute(insertSql, {'id': None, col_name: value})
            aux_id = cursor.lastrowid
        return aux_id

    def is_book_description(self, record):
        assert record['record_type'] in ['DESCRIPTION', 'FILE']
        return record['record_type'] == 'DESCRIPTION'

    def add_record(self, record, cursor):
        """
        :param record:      rdf parser record format
        :param cursor:      If pending transaction, database cursor to use.  May be None
        """
        cursor_owner = cursor is None
        if cursor is None:
            cursor = self.db.cursor()

        assert (len(self.MAIN_TABLE_SCHEMA) == 1
                )  # expect only one main table, else book_id is not unique

        # id should be null so that it will be autogenerated
        assert ('id' not in record)
        #record['id'] = None

        if self.is_book_description(record):
            # insert main book entry
            for table_name, col_schema in self.MAIN_TABLE_SCHEMA.items():
                insertSql = self._create_insert_sql(table_name, col_schema)

                # some titles include multiple entries (perhaps because of translations)
                # let's combine them into one title separated by slashes
                for cname in ['title', 'friendlytitle']:
                    v = record[cname]
                    if isinstance(v, (list, tuple)):
                        record[cname] = u' / '.join(v)

                if self.debug:
                    print insertSql, record
                cursor.execute(insertSql, record)
                book_id = record[
                    'textId']  # used below when inserting fields into aux tables

            # insert aux entries.  more than one data value may exist for each column in a record
            for table_name, col_schema in self.AUX_TABLE_SCHEMA.items():
                insertSql = self._create_insert_sql(table_name, col_schema)
                if self.debug:
                    print insertSql
                col_name = self._get_aux_table_unique_column_name(col_schema)
                selectSql = "SELECT id FROM %s WHERE %s=?" % (table_name,
                                                              col_name)
                values = record[col_name]
                # if list of values insert each in turn
                if not isinstance(values, basestring):
                    for value in values:
                        aux_id = self._select_id_or_insert(
                            cursor, selectSql, insertSql, col_name, value)
                        self._insert_mapping_from_book_to_aux(
                            cursor, table_name, book_id, aux_id)
                # otherwise insert the single value
                else:
                    if self.debug:
                        print selectSql, values
                    aux_id = self._select_id_or_insert(cursor, selectSql,
                                                       insertSql, col_name,
                                                       values)
                    self._insert_mapping_from_book_to_aux(
                        cursor, table_name, book_id, aux_id)
        else:  # record_type is FILE type
            for table_name, col_schema in self.FILE_TABLE_SCHEMA.items():
                # if format is list, merge to newline delimited field
                if not isinstance(record['format'], basestring):
                    record['format'] = u"\n".join(record['format'])
                insertSql = self._create_insert_sql(table_name, col_schema)
                cursor.execute(insertSql, record)

        if cursor_owner:
            self.db.commit()

    def add_many_records(self, record_list):
        """
        Creates transaction and iterates through record list inserting values
        """
        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w',
                               0)  # don't buffer stdout

        cursor = self.db.cursor()
        print "Bulk adding records"
        try:
            for count, record in enumerate(record_list):
                self.add_record(record, cursor)
                if (count % 10000) == 0:
                    print count,
            print "\n%d records added." % (count + 1)
            print "Removing books which have all files filtered out..."

            # Now remove books for which we don't have any files records. TODO: Verify that a lack of
            # file entries is always a result of our filtering.  If we need to discover files through
            # some means other than a file record in the gutenberg index this may need to change.
            # TODO: Remove aux and mapping table entries associated with removed books.
            (number_of_books_without_files, ) = cursor.execute(
                'select count(BOOKS.textId) from gutenberg_books as BOOKS where (select count(*) from gutenberg_files as FILES where FILES.textId=BOOKS.textId) == 0;'
            ).fetchone()
            print "Number of books without files %d." % number_of_books_without_files
            removed_count = cursor.execute(
                'delete from gutenberg_books where(select count(*) from gutenberg_files as FILES where FILES.textId=gutenberg_books.textId)==0'
            ).rowcount
            print "Removed %d records." % removed_count
            if number_of_books_without_files != removed_count:
                print "WARNING: NUMBER OF RECORDS REMOVED DOES NOT MATCH NUMBER OF BOOKS WITHOUT FILES"
            print "Committing..."
        finally:
            # always commit rather than rollback if sqlite3.DatabaseError because easier to debug
            self.db.commit()

    def create_custom_title_order_index(self):
        print "Populating title_order column using lowercase title without punctuation"
        remove_punctuation_map = dict(
            (ord(char), None) for char in string.punctuation)

        try:
            cur = self.db.cursor()
            cur.execute("SELECT textId, title FROM gutenberg_books;")
            data = cur.fetchall()
            for (index, row) in enumerate(
                    sorted(data,
                           key=lambda row: row[1].lower().translate(
                               remove_punctuation_map))):
                (textId, _) = row
                cur.execute(
                    "UPDATE gutenberg_books SET title_order=? WHERE textId=?",
                    [index, textId])

            cur.execute(
                "CREATE INDEX title_index ON gutenberg_books (title_order);")
            self.db.commit()
            print "completed"
        except:
            self.db.rollback()
            raise

    def compute_author_downloads(self):
        def get_temp_insert_sql(aux_table, map_table, aux_colname):
            return 'INSERT INTO temp_counts (id, downloads) SELECT aux.id, SUM(book.downloads) FROM gutenberg_books as book, {0} as aux, {1} as map WHERE aux.id=map.{2}_id and book.textId=map.book_id GROUP BY aux.{2};'.format(
                aux_table, map_table, aux_colname)

        def get_update_sql(aux_table):
            return 'UPDATE {0} SET downloads=(SELECT t.downloads from temp_counts as t where t.id={0}.id);'.format(
                aux_table)

        def update(cursor, aux_table, map_table, aux_colname):
            insert_sql = get_temp_insert_sql(aux_table, map_table, aux_colname)
            update_sql = get_update_sql(aux_table)
            cursor.execute('delete from temp_counts;')
            cursor.execute(insert_sql)
            cursor.execute(update_sql)

        print "stored downloads per creator/contributor for sorting"
        try:
            cur = self.db.cursor()
            cur.execute(
                'CREATE TEMP TABLE temp_counts (id int primary key, downloads int);'
            )
            update(cur, 'gutenberg_creators', 'gutenberg_books_creator_map',
                   'creator')
            update(cur, 'gutenberg_contributors',
                   'gutenberg_books_contributor_map', 'contributor')
            self.db.commit()
        except:
            self.db.rollback()
            raise

    def create_additional_indices(self):
        print "creating indices for creator and contributors"
        try:
            cur = self.db.cursor()
            sql = "CREATE INDEX {0}_index ON gutenberg_{1}({0});"

            for name in self.AUX_COLUMN_NAMES + ['file']:
                cur.execute(sql.format(name, pluralize(name)))
            self.db.commit()
        except:
            self.db.rollback()
            raise
import sys
import json
import os
from pluralize import pluralize

relations = json.loads(open(sys.argv[1]).read())
result = {}
for unit, base in relations.items():
    bare = unit.lower()
    if bare not in result:
        result[bare] = unit
        result[pluralize(bare)] = unit
    bare = base[2].lower()
    if bare not in result:
        result[bare] = base[2]
        result[pluralize(bare)] = base[2]

print(json.dumps(result, indent=2))