コード例 #1
0
ファイル: coreData.py プロジェクト: cmerwich/bhsa
mqlFile = '{}/{}.mql'.format(thisTempSource, CORE_NAME)
thisTempTf = '{}/tf'.format(thisTemp)

thisTf = '{}/tf/{}'.format(thisRepo, VERSION)

# # Test
#
# Check whether this conversion is needed in the first place.
# Only when run as a script.

# In[4]:

if SCRIPT:
    testFile = '{}/.tf/otype.tfx'.format(thisTf)
    (good, work) = utils.mustRun(mqlzFile,
                                 '{}/.tf/otype.tfx'.format(thisTf),
                                 force=FORCE)
    if not good: stop(good=False)
    if not work: stop(good=True)

# # TF Settings
#
# We add some custom information here.
#
# * the MQL object type that corresponds to the TF slot type, typically `word`;
# * a piece of metadata that will go into every feature; the time will be added automatically
# * suitable text formats for the `otext` feature of TF.
#
# The oText feature is very sensitive to what is available in the source MQL.
# It needs to be configured here.
# We save the configs we need per source and version.
コード例 #2
0
if not os.path.exists(thisNotes):
    os.makedirs(thisNotes)

# # Test
#
# Check whether this conversion is needed in the first place.
# Only when run as a script.

# In[6]:

# In[13]:

if SCRIPT:
    (good, work) = utils.mustRun(None,
                                 "{}/.tf/{}.tfx".format(thisTf, "sense"),
                                 force=FORCE)
    if not good:
        stop(good=False)
    if not work:
        stop(good=True)

# # Loading the feature data
#
# We load the features we need from the BHSA core database and from the valence module,
# as far as generated by the
# [enrich](https://github.com/ETCBC/valence/blob/master/programs/enrich.ipynb) notebook.

# In[7]:

# In[14]:
コード例 #3
0
newFeatures = sorted(m for m in metaData if m != '')
newFeaturesStr = ' '.join(newFeatures)

utils.caption(0, '{} languages ...'.format(len(newFeatures)))

# # Test
#
# Check whether this conversion is needed in the first place.
# Only when run as a script.

# In[16]:

if SCRIPT:
    (good,
     work) = utils.mustRun(None,
                           '{}/.tf/{}.tfx'.format(thisTf, newFeatures[0]),
                           force=FORCE)
    if not good: stop(good=False)
    if not work: stop(good=True)

# # Load existing data

# In[17]:

utils.caption(4, 'Loading relevant features')

TF = Fabric(locations=thisTf, modules=[''])
api = TF.load('book')
api.makeAvailableIn(globals())

nodeFeatures = {}
コード例 #4
0
ファイル: lexicon.py プロジェクト: OsvaldoJ/bhsa
thisTf = '{}/tf/{}'.format(thisRepo, VERSION)

# In[5]:

testFeature = 'lex0'

# # Test
#
# Check whether this conversion is needed in the first place.
# Only when run as a script.

# In[6]:

if SCRIPT:
    (good, work) = utils.mustRun(None,
                                 '{}/.tf/{}.tfx'.format(thisTf, testFeature),
                                 force=FORCE)
    if not good: stop(good=False)
    if not work: stop(good=True)

# # TF Settings
#
# * a piece of metadata that will go into these features; the time will be added automatically
# * new text formats for the `otext` feature of TF, based on lexical features.
#   We select the version specific otext material,
#   falling back on a default if nothing appropriate has been specified in oText.
#
# We do not do this for the older versions `4` and `4b`.

# In[7]:
コード例 #5
0
# In[3]:

mysqlZFile = "{}/{}.sql.gz".format(thisMysql, passageDb)
mysqlFile = "{}/{}.sql".format(thisTempMysql, passageDb)

# # Test
#
# Check whether this conversion is needed in the first place.
# Only when run as a script.

# In[4]:

# In[4]:

if SCRIPT:
    (good, work) = utils.mustRun(None, mysqlZFile, force=FORCE)
    if not good:
        stop(good=False)
    if not work:
        stop(good=True)

# In[5]:

# In[5]:

for path in (thisMysql, thisTempMysql):
    if not os.path.exists(path):
        os.makedirs(path)

# # Collect
#