Beispiel #1
0
def scan(strg):
    """
    check for extent of syntax specification

    arguments:
        strg  - string of chars to scan

    returns:
        char count > 0 on finding possible syntax specification, 0 otherwise
    """

    n = 0
    for c in strg:
        if c == '.' or ellyChar.isLetterOrDigit(c): n += 1
        else: break
    else:
        return n
    c = strg[n]
    if c == ' ': return n
    if c != '[': return 0
    k = featureSpecification.scan(strg[n:])
    return n + k if k > 0 else 0
Beispiel #2
0
def scan ( strg ):

    """
    check for extent of syntax specification

    arguments:
        strg  - string of chars to scan

    returns:
        char count > 0 on finding possible syntax specification, 0 otherwise
    """

    n = 0
    for c in strg:
        if c == '.' or ellyChar.isLetterOrDigit(c): n += 1
        else: break
    else:
        return n
    c = strg[n]
    if c == ' ': return n
    if c != '[': return 0
    k = featureSpecification.scan(strg[n:])
    return n + k if k > 0 else 0
def compile ( name , stb , defn ):

    """
    static method to create an Elly vocabulary database from text file input

    arguments:
        name  - for new SQLite database
        stb   - Elly symbol table
        defn  - Elly definition reader for vocabulary

    exceptions:
        TableFailure on error
    """

    global nerr
    nerr = 0
    cdb = None  # SQLite db connection
    cur = None  # SQLite db cursor

#   print 'compiled stb=' , stb

    if stb == None :
        print >> sys.stderr, 'no symbol table'
        raise ellyException.TableFailure

    try:
        zfs = FSpec(stb,'[$]',True).positive.hexadecimal(False)
    except ellyException.FormatFailure:              # should never need this
        print >> sys.stderr , 'unexpected failure with zero features'
        raise ellyException.TableFailure

#   print 'zfs=' , zfs               # hexadecimal for all features off

    tsave = ''                                       # original term
    dsave = ''                                       #          definition

    try:
        filn = name + vocabulary                     # where to put vocabulary database
        try:
            os.remove(filn)                          # delete the file if it exists
        except OSError:
            print >> sys.stderr , 'no' , filn        # if no such file, warn but proceed

#### SQLite
####
        try:
            cdb = dbs.connect(filn)                  # create new database
            cur = cdb.cursor()
            cur.execute("CREATE TABLE Vocab(Keyx TEXT, Defn TEXT)")
            cdb.commit()
        except dbs.Error , e:
            print >> sys.stderr , e
            raise ellyException.TableFailure         # give up on any database failure

#       print 'creating' , filn
#
####

        r = None                                          # for error reporting

        while True:                                       # process vocabulary definition records

            try:                                          # for catching FormatFailure exception
#               print '------------'
                r = defn.readline()                       # next definition
                if len(r) == 0: break                     # stop on EOF
#               print type(r) , r

                k = r.find(':')                           # look for first ':'
                if k < 0:
                    tsave = r
                    dsave = None
                    _err()                                # report error and quit entry

                t = r[:k].strip()                         # term to go into dictionary
                d = r[k+1:].strip()                       # its definition
                tsave = t                                 # save for any error reporting
                dsave = d                                 #

#               print ' tm=' , '<' + t + '>' , 'df=' , '<' + d + '>'
                if len(t) == 0 or len(d) == 0:
                    _err()                                # quit on missing parts
                c = t[0]
                if not ellyChar.isLetterOrDigit(c) and c != '.' and c != '"':
                    _err('bad term')

                n = delimitKey(t)                         # get part of term to index
                if n <= 0:
                    _err()                                # quit on bad term
                wky = toKey(t[:n])                        # key part of term to define
#               print '  SQLite key=' , wky

                ns = syntaxSpecification.scan(d)          # find extent of syntax info
#               print 'ns=' , ns
                if ns <= 0: _err('bad syntax specification')
#               print 'PoS=' , d[:ns]

                syn = d[:ns]                              # syntax info as string
                d = d[ns:].strip()                        # rest of definition

                try:
#                   print 'VT syn=' , syn
                    ss = SSpec(stb,syn)                   # decode syntax info
#                   print 'VT ss =' , ss
                except ellyException.FormatFailure:
                    _err('malformed syntax specification')
                cat = str(ss.catg)                        #   syntax category
                syf = ss.synf.positive.hexadecimal(False) #   syntactic flags
#               print 'syf=' , syf

                smf = zfs                                 # initialize defaults for
                pb = '0'                                  #   cognitive semantics
                cn = conceptualHierarchy.NOname           #

#               print '0:d=[' + d + ']'
                if len(d) > 1:                            # check for cognitive semantics
                    x = d[0]
                    if x == '[' or x == '0' or x == '-':  # semantic features?
                        if x != '[':                      # a '0' or '-' means to take default
                            if len(d) == 1 or d[1] != ' ':
                                _err('missing semantic features')
                            d = d[2:].strip()             # skip over
                        else:
                            ns = featureSpecification.scan(d) # look for ']' of features
#                           print 'ns=' , ns
                            if ns < 0:
                                _err()
                            sem = d[:ns]                  # get semantic features
                            d = d[ns:].strip()            # skip over
                            try:
#                               print 'smf=' , smf
                                fs = FSpec(stb,sem,True)
                            except ellyException.FormatFailure:
                                _err('bad semantic features')
                            smf = fs.positive.hexadecimal(False) # convert to hex

#                       print '1:d=[' + d + ']'
                        ld = len(d)
#                       print 'ld=' , ld
                        if ld == 0:
                            _err('missing plausibility')
                        np = 0
                        x = d[np]
                        if x == '+' or x == '-':
                            np += 1                       # take any plus or minus sign
                        while np < ld:                    # and successive digits
                            if ellyChar.isDigit(d[np]): np += 1
                            else: break
#                       print 'np=' , np
                        if np == 0:
                            _err('missing plausibility')
                        pb = d[:np]                       # plausibility bias
#                       print 'pb=' , pb
                        d = d[np:]
                        ld = len(d)
#                       print '2:d=[' + d + ']'
                        if ld > 1:                        # any more to process?
                            c = d[0]                      # get next char after bias
                            d = d[1:]                     # advance scan
                            ld -= 1
                            if c == '/':                  # check for explicit concept
#                               print 'getting concept'
                                np = 0
                                while np < ld:            # get extent of concept
                                    if ellyChar.isWhiteSpace(d[np]): break
                                    np += 1
                                if np == 0:
                                    _err('missing concept for plausibility')
                                cn = d[:np]               # extract concept
                                d = d[np:]
                            elif c != ' ':
                                _err()                    # signal bad format
                        elif ld > 0:
                            _err()                        # unidentifiable trailing text

                d = d.strip()                             # rest of definition
#               print 'rest of d=' , d
                if len(d) > 0 and d[-1] == '=':
                    if len(d) == 1 or d[0] != '=':
                        _err('incomplete definition')

                ld = [ ]                            # for normalizing definition

                k = 0                               # count spaces removed
                sd = ''                             # previous char seen
                for cd in d:                        # scan all chars in translation
                    if cd == ' ':
                        if sd == '=' or sd == ',' or sd == ' ':
                            k += 1
                            sd = cd
                            continue
                    elif cd == '=' or cd == ',':    # no spaces before '=' or ','
                        if sd == ' ':
                            k += 1
                            ld.pop()
                    if cd == ',':
                        if sd == '=':
                            _err('missing translation')
                        cd = '#'                    # format for PICK operation
                    elif cd == '=' and sd == '=':
                        print >> sys.stderr , '** WARNING \'=\' followed by \'=\''
                        print >> sys.stderr , '*  at [' , tsave , ']'

                    sd = cd
                    ld.append(cd)                   # add char to reformatted definition

                if k > 0:
                    d = ''.join(ld)                 # definition with spaces removed

#               print '3:d=[' + d + ']'

                vrc = [ t , ':' , cat , syf , smf ,
                        pb , cn ]                   # start data record
                vss = u' '.join(vrc)                # convert to string
                vss += u' ' + d                     # fill out record with rest of input
#               print 'type(vss)=' , type(vss)

#               print 'rec=' , vrc , 'tra=' , d
#               print '   =' , vss

            except ellyException.FormatFailure:
                print >> sys.stderr , '*  at [' , tsave ,
                if dsave != None:
                    print >> sys.stderr , ':' , dsave ,
                print >> sys.stderr , ']'
                continue                            # skip rest of processing

#### SQLite
####
            try:
                sql = "INSERT INTO Vocab VALUES(?,?)"
#               print type(wky) , wky , type(vss) , vss
                cur.execute(sql,(wky,vss))
            except dbs.Error , e:
                print >> sys.stderr , 'FATAL' , e
                sys.exit(1)
Beispiel #4
0
def build(name, stb, defn):
    """
    static method to create an Elly vocabulary database from text file input

    arguments:
        name  - for new SQLite database
        stb   - Elly symbol table
        defn  - Elly definition reader for vocabulary

    exceptions:
        TableFailure on error
    """

    global nerr
    nerr = 0
    cdb = None  # SQLite db connection
    cur = None  # SQLite db cursor

    #   print ( 'built stb=' , stb )

    if stb == None:
        print('no symbol table', file=sys.stderr)
        raise ellyException.TableFailure

    try:
        zfs = FSpec(stb, '[$]', True).positive.hexadecimal(False)
    except ellyException.FormatFailure:  # should never need this
        print('unexpected failure with zero features', file=sys.stderr)
        raise ellyException.TableFailure

#   print ( 'zfs=' , zfs )                           # hexadecimal for all features off

    tsave = ''  # original term
    dsave = ''  #          definition

    try:
        filn = name + vocabulary  # where to put vocabulary database
        try:
            os.remove(filn)  # delete the file if it exists
        except OSError:
            print('no', filn,
                  file=sys.stderr)  # if no such file, warn but proceed

#### SQLite DB operations
####
        try:
            cdb = dbs.connect(filn)  # create new database
            cur = cdb.cursor()
            cur.execute("CREATE TABLE Vocab(Keyx TEXT, Defn TEXT)")
            cdb.commit()
        except dbs.Error as e:
            print(e, file=sys.stderr)
            raise ellyException.TableFailure  # give up on any database failure

#       print ( 'creating' , filn )
#
####

        r = None  # for error reporting

        while True:  # process vocabulary definition records

            try:  # for catching FormatFailure exception
                #               print ( '------------' )
                r = defn.readline()  # next definition
                if len(r) == 0: break  # stop on EOF
                #               print ( type(r) , r )
                r = definitionLine.normalize(r)  #
                #               print ( 'to' , r )

                k = r.find(' : ')  # look for first ' : '
                if k < 0:
                    tsave = r
                    dsave = None
                    _err()  # report error and quit entry

                t = r[:k].strip()  # term to go into dictionary
                d = r[k + 2:].strip()  # its definition
                tsave = t  # save for any error reporting
                dsave = d  #

                #               print ( ' tm=' , '<' + t + '>' , 'df=' , '<' + d + '>' )
                if len(t) == 0 or len(d) == 0:
                    _err()  # quit on missing parts
                if ellyConfiguration.language == 'ZH':  # special key for Chinese
                    wky = toKeyZH(t[0])
                else:
                    c = t[0]
                    if not ellyChar.isLetterOrDigit(c) and not c in initChr:
                        _err('bad term')

                    n = delimitKey(t)  # get part of term to index
                    #                   print ( 'delimit=' , n )
                    if n <= 0:
                        _err()  # quit on bad term
                    wky = toKey(t[:n])  # key part of term to define

#               print ( '  SQLite key=' , wky )

#               print ( 'd=' , d )
                ns = syntaxSpecification.scan(d)  # find extent of syntax info
                #               print ( 'ns=' , ns , '"' + d[ns:] + '"' )
                if ns <= 0: _err('bad syntax specification')
                if not d[ns:] == '' and d[ns] != ' ':
                    _err('trailing chars in syntax specification')
                #               print ( 'PoS=' , d[:ns] )

                syn = d[:ns]  # syntax info as string
                d = d[ns:].strip()  # rest of definition

                try:
                    #                   print ( 'VT syn=' , syn )
                    ss = SSpec(stb, syn)  # decode syntax info
#                   print ( 'VT ss =' , ss )
                except ellyException.FormatFailure:
                    _err('malformed syntax specification')
                cat = str(ss.catg)  #   syntax category
                cid = _smfchk[ss.catg]  #   associated semantic feature ID
                syf = ss.synf.positive.hexadecimal(False)  #   syntactic flags
                #               print ( 'cat=' , cat )
                #               print ( 'syf=' , syf )

                smf = zfs  # initialize defaults for
                pb = '0'  #   cognitive semantics
                cn = conceptualHierarchy.NOname  #

                #               print ( '0:d=[' + d + ']' )
                if len(d) > 1:  # check for cognitive semantics
                    x = d[0]
                    if x == '[' or x == '0' or x == '-':  # semantic features?
                        if x != '[':  # a '0' or '-' means to take default
                            if len(d) == 1 or d[1] != ' ':
                                _err('missing semantic features')
                            d = d[2:].strip()  # skip over
                        else:
                            ns = featureSpecification.scan(
                                d)  # look for ']' of features
                            #                           print ( 'ns=' , ns )
                            if ns < 0:
                                _err()
                            sem = d[:ns]  # get semantic features
                            d = d[ns:].strip(
                            )  # skip over for subsequent processing

                            sid = sem[1]  # feature ID
                            if sid != cid:
                                if cid != None:
                                    _err('inconsistent semantic feature id')
                                _smfchk[ss.catg] = sid

                            try:
                                #                               print ( 'smf=' , smf )
                                fs = FSpec(stb, sem, True)
                            except ellyException.FormatFailure:
                                _err('bad semantic features')
                            smf = fs.positive.hexadecimal(
                                False)  # convert to hex

#                       print ( '1:d=[' + d + ']' )
                        ld = len(d)
                        #                       print ( 'ld=' , ld )
                        if ld == 0:
                            _err('missing plausibility')
                        np = 0
                        x = d[np]
                        if x == '+' or x == '-':
                            np += 1  # take any plus or minus sign
                        while np < ld:  # and successive digits
                            if ellyChar.isDigit(d[np]): np += 1
                            else: break
#                       print ( 'np=' , np )
                        if np == 0:
                            _err('missing plausibility')
                        pb = d[:np]  # plausibility bias
                        #                       print ( 'pb=' , pb )
                        d = d[np:]
                        ld = len(d)
                        #                       print ( '2:d=[' + d + ']' )
                        if ld > 1:  # any more to process?
                            c = d[0]  # get next char after bias
                            d = d[1:]  # advance scan
                            ld -= 1
                            if c == '/':  # check for explicit concept
                                #                               print ( 'getting concept' )
                                np = 0
                                while np < ld:  # get extent of concept
                                    if ellyChar.isWhiteSpace(d[np]): break
                                    np += 1
                                if np == 0:
                                    _err('missing concept for plausibility')
                                cn = d[:np]  # extract concept
                                d = d[np:]
                            elif c != ' ':
                                _err()  # signal bad format
                        elif ld > 0:
                            _err()  # unidentifiable trailing text
                    elif d[0] != '(':
                        dd = d
                        while ellyChar.isLetterOrDigit(dd[0]):
                            dd = dd[1:]
                        if len(dd) == 0 or dd[0] != '=':
                            _err()

                d = d.strip()  # rest of definition
                #               print ( 'rest of d=' , d )
                if len(d) > 0 and d[-1] == '=':
                    if len(d) == 1 or d[0] != '=':
                        _err('incomplete definition')

                ld = []  # for normalizing definition

                k = 0  # count spaces removed
                sd = ''  # previous char seen
                for cd in d:  # scan all chars in translation
                    #                   print ( 'cd=' , cd )
                    if cd == ' ':
                        if sd == '=' or sd == ',' or sd == ' ':
                            k += 1
                            sd = cd
                            continue
                    elif cd == '=' or cd == ',':  # no spaces before '=' or ','
                        if sd == ' ':
                            k += 1
                            ld.pop()
                    if cd == ',':
                        if sd == '=':
                            _err('missing translation')
                        cd = '#'  # format for PICK operation
                    elif cd == '=' and sd == '=':
                        print('** WARNING \'=\' followed by \'=\'',
                              file=sys.stderr)
                        print('*  at [', tsave, ']', file=sys.stderr)

                    sd = cd
                    ld.append(cd)  # add char to reformatted definition

#               print ( 'ld=' , ld )
                if k > 0:
                    d = ''.join(ld)  # definition with spaces removed

#               print ( '3:d=[' + d + ']' )

                vrc = [t, '=:', cat, syf, smf, pb, cn]  # start data record
                vss = ' '.join(vrc)  # convert to string
                vss += ' ' + d  # fill out record with rest of input
#               print ( 'type(vss)=' , type(vss) )

#               print ( 'rec=' , vrc , 'tra=' , d )
#               print ( '   =' , vss )

            except ellyException.FormatFailure:  # will catch exceptions from _err()
                print('*  at [', tsave, end=' ', file=sys.stderr)
                if dsave != None:
                    print(':', dsave, end=' ', file=sys.stderr)
                print(']', file=sys.stderr)
                continue  # skip rest of processing this rule

#### SQLite DB operation
####
            try:
                sql = "INSERT INTO Vocab VALUES(?,?)"
                #               print ( type(wky) , wky , type(vss) , vss )
                cur.execute(sql, (wky, vss))
            except dbs.Error as e:
                print('FATAL', e, file=sys.stderr)
                sys.exit(1)
#
####

#### SQLite DB operations
####
        if nerr == 0:
            cdb.commit()
        cdb.close()  # clean up


#       print ( 'DONE' )
#
####

    except Error as e:  # catch any other errors
        print('**', e, file=sys.stderr)
        print('*  at', r, file=sys.stderr)
        nerr += 1

    if nerr > 0:
        print('**', nerr, 'vocabulary table errors in all', file=sys.stderr)
        print('*  compilation FAILed', file=sys.stderr)
        cdb.close()  # discard any changes
        raise ellyException.TableFailure
Beispiel #5
0
def compile ( name , stb , defn , stem=None ):

    """
    static method to create an Elly vocabulary database from text file input

    arguments:
        name  - for new BSDDB database
        stb   - Elly symbol table
        defn  - Elly definition reader for vocabulary
        stem  - optional stemmer for indexing

    exceptions:
        TableFailure on error
    """

    global nerr
    nerr = 0

#   print >> sys.stderr , 'compiled stb=' , stb , 'stem=' , stem , 'db=' , db

    if stb == None :
        print >> sys.stderr, 'no symbol table'
        raise ellyException.TableFailure
    if db  == None :
        print >> sys.stderr, 'no Python db package'
        raise ellyException.TableFailure

    try:
        zfs = FSpec(stb,'[$]',True).positive.hexadecimal(False)
    except ellyException.FormatFailure:              # should never need this
        print >> sys.stderr , 'unexpected failure with zero features'
        raise ellyException.TableFailure

#   print >> sys.stderr , 'zfs=' , zfs               # hexadecimal for all features off

    tsave = ''                                       # original term
    dsave = ''                                       #          definition

    try:
        filn = name + vocabulary                     # where to put vocabulary database
        try:
            os.remove(filn)                          # delete the file if it exists
        except OSError:
            print >> sys.stderr , 'no' , filn
        dbs = db.DB()                                # create new database
        dbs.set_flags(db.DB_DUP)                     # keys may identify multiple records
        dbs.open(filn,None,db.DB_HASH,db.DB_CREATE)  # open new database file
#       print >> sys.stderr , 'creating' , filn

        r = None                                          # for error reporting

        while True:                                       # process vocabulary records

            try:
#               print >> sys.stderr , '------------'
                r = defn.readline()                       # next definition
                if len(r) == 0: break                     # stop on EOF
                if r[0] == '#': continue                  # skip comment line
#               print >> sys.stderr , 'def=' , r

                k = r.find(':')                           # look for first ':'
                if k < 0:
                    tsave = r
                    dsave = None
                    _err()                                # report error and quit entry
                    continue

                t = r[:k].strip()                         # term to go into dictionary
                d = r[k+1:].strip()                       # its definition
                tsave = t                                 # save for any error reporting
                dsave = d                                 #

#               print >> sys.stderr , ' tm=' , '<' + t + '>' , 'df=' , '<' + d + '>'
                if len(t) == 0 or len(d) == 0:
                    _err()                                # quit on missing parts
                    continue
                c = t[0]
                if not ellyChar.isLetterOrDigit(c) and c != '.' and c != '"':
                    _err('bad term')
                    continue

                n = toIndex(t)                            # get part of term to index
                if n == 0:
                    _err()                                # quit on bad term
                    continue
                w = t[:n]                                 # first word of term to define  
                if stem != None:
                    try:
                        w = stem.simplify(w)              # reduce for lookup key
                    except ellyException.StemmingError:
                        _err('bad stemming logic')
                        continue
#               print >> sys.stderr , '  w=' , w
                lcw = lcAN(w)                             # convert to ASCII lower case
#               print >> sys.stderr , 'lcw=' , '"' + lcw + '"'

                ns = syntaxSpecification.scan(d)          # find extent of syntax info
#               print >> sys.stderr , 'ns=' , ns
                if ns <= 0: _err('bad syntax specification')
#               print >> sys.stderr , 'PoS=' , d[:ns]

                syn = d[:ns]                              # syntax info as string
                d = d[ns:].strip()                        # rest of definition

                try:
#                   print >> sys.stderr , 'VT syn=' , syn
                    ss = SSpec(stb,syn)                   # decode syntax info to get
#                   print >> sys.stderr , 'VT ss =' , ss
                except ellyException.FormatFailure:
                    _err('malformed syntax specification')
                    continue
                cat = str(ss.catg)                        #   syntax category
                syf = ss.synf.positive.hexadecimal(False) #   syntactic flags
#               print >> sys.stderr , 'syf=' , syf

                smf = zfs                                 # initialize defaults for
                pb = '0'                                  #   cognitive semantics
                cn = '-'                                  #

#               print >> sys.stderr , '0:d=[' + d + ']'
                if len(d) > 1:                            # check for cognitive semantics
                    x = d[0]
                    if x == '[' or x == '0' or x == '-':  # semantic features?
                        if x != '[':                      # a '0' or '-' means to take default
                            if len(d) == 1 or d[1] != ' ':
                                _err('missing semantic features')
                                continue
                            d = d[2:].strip()             # skip over
                        else:
                            ns = featureSpecification.scan(d) # look for ']' of features
#                           print >> sys.stderr , 'ns=' , ns
                            if ns < 0:
                                _err()
                                continue
                            sem = d[:ns]                  # get semantic features
                            d = d[ns:].strip()            # skip over
                            try:
#                               print >> sys.stderr , 'smf=' , smf
                                fs = FSpec(stb,sem,True)
                            except ellyException.FormatFailure:
                                _err('bad semantic features')
                                continue
                            smf = fs.positive.hexadecimal(False) # convert to hex

#                       print >> sys.stderr , '1:d=[' + d + ']'
                        ld = len(d)
#                       print >> sys.stderr , 'ld=' , ld
                        if ld == 0:
                            _err('missing plausibility')
                            continue
                        np = 0
                        x = d[np]
                        if x == '+' or x == '-':
                            np += 1                       # take any plus or minus sign
                        while np < ld:                    # and successive digits
                            if ellyChar.isDigit(d[np]): np += 1
                            else: break
#                       print >> sys.stderr , 'np=' , np
                        if np == 0:
                            _err('missing plausibility')
                            continue
                        pb = d[:np]                       # plausibility bias
#                       print >> sys.stderr , 'pb=' , pb
                        d = d[np:]
                        ld = len(d)
#                       print >> sys.stderr , '2:d=[' + d + ']'
                        if ld > 1:                        # any more to process?
                            c = d[0]                      # get next char after bias
                            d = d[1:]                     # advance scan
                            ld -= 1
                            if c == '/':                  # check for explicit concept
#                               print >> sys.stderr , 'getting concept'
                                np = 0
                                while np < ld:            # get extent of concept
                                    if ellyChar.isWhiteSpace(d[np]): break
                                    np += 1
                                if np == 0:
                                    _err('missing concept for plausibility')
                                    continue
                                cn = d[:np]               # extract concept
                                d = d[np:]
                            elif c != ' ':
                                _err()                    # signal bad format
                                continue
                        elif ld > 0:
                            _err()                        # unidentifiable trailing text
                            continue

                d = d.strip()                             # rest of definition
#               print 'rest of d=' , d
                if len(d) > 0 and d[-1] == '=':
                    if len(d) == 1 or d[0] != '=':
                        _err('incomplete definition')
                        continue

                ld = [ ]                            # for normalizing definition

                k = 0                               # count spaces removed
                sd = ''                             # previous char seen
                for cd in d:                        # scan all chars in translation
                    if cd == ' ':
                        if sd == '=' or sd == ',' or sd == ' ':
                            k += 1
                            sd = cd
                            continue
                    elif cd == '=' or cd == ',':    # no spaces before '=' or ','
                        if sd == ' ':
                            k += 1
                            ld.pop()
                    if cd == ',':
                        if sd == '=':
                            _err('missing translation')
                        cd = '#'                    # format for PICK operation
                    elif cd == '=' and sd == '=':
                        print >> sys.stderr , '** WARNING \'=\' followed by \'=\''
                        print >> sys.stderr , '*  at [' , tsave , ']'

                    sd = cd
                    ld.append(cd)                   # add char to reformatted definition

                if k > 0:
                    d = ''.join(ld)                 # definition with spaces removed

#               print >> sys.stderr , '3:d=[' + d + ']'

                vrc = [ t , ':' , cat , syf , smf ,
                        pb , cn ]                         # start BdB data record
                vss = u' '.join(vrc)                      # convert to string
                vss += u' ' + d                           # fill out record with rest of input
#               print >> sys.stderr , 'type(vss)=' , type(vss)
                rss = vss.encode('utf8')                  # convert to UTF-8

#               print >> sys.stderr , 'rec=' , vrc , 'tra=' , d
#               print >> sys.stderr , '   =' , rss

            except ellyException.FormatFailure:
                print >> sys.stderr , '*  at [' , tsave ,
                if dsave != None:
                    print >> sys.stderr , ':' , dsave ,
                print >> sys.stderr , ']'
                continue

#           print >> sys.stderr , 'lcw=' , lcw
            dbs.put(lcw,rss)                          # save in database
#           print >> sys.stderr , 'saved'

#       print >> sys.stderr , 'DONE'
        dbs.close()                                   # clean up

    except StandardError , e:                         # catch any other errors
        print >> sys.stderr , '**' , e
        print >> sys.stderr , '*  at' , r
        nerr += 1