mqlFile = '{}/{}.mql'.format(thisTempSource, CORE_NAME) thisTempTf = '{}/tf'.format(thisTemp) thisTf = '{}/tf/{}'.format(thisRepo, VERSION) # # Test # # Check whether this conversion is needed in the first place. # Only when run as a script. # In[4]: if SCRIPT: testFile = '{}/.tf/otype.tfx'.format(thisTf) (good, work) = utils.mustRun(mqlzFile, '{}/.tf/otype.tfx'.format(thisTf), force=FORCE) if not good: stop(good=False) if not work: stop(good=True) # # TF Settings # # We add some custom information here. # # * the MQL object type that corresponds to the TF slot type, typically `word`; # * a piece of metadata that will go into every feature; the time will be added automatically # * suitable text formats for the `otext` feature of TF. # # The oText feature is very sensitive to what is available in the source MQL. # It needs to be configured here. # We save the configs we need per source and version.
if not os.path.exists(thisNotes): os.makedirs(thisNotes) # # Test # # Check whether this conversion is needed in the first place. # Only when run as a script. # In[6]: # In[13]: if SCRIPT: (good, work) = utils.mustRun(None, "{}/.tf/{}.tfx".format(thisTf, "sense"), force=FORCE) if not good: stop(good=False) if not work: stop(good=True) # # Loading the feature data # # We load the features we need from the BHSA core database and from the valence module, # as far as generated by the # [enrich](https://github.com/ETCBC/valence/blob/master/programs/enrich.ipynb) notebook. # In[7]: # In[14]:
newFeatures = sorted(m for m in metaData if m != '') newFeaturesStr = ' '.join(newFeatures) utils.caption(0, '{} languages ...'.format(len(newFeatures))) # # Test # # Check whether this conversion is needed in the first place. # Only when run as a script. # In[16]: if SCRIPT: (good, work) = utils.mustRun(None, '{}/.tf/{}.tfx'.format(thisTf, newFeatures[0]), force=FORCE) if not good: stop(good=False) if not work: stop(good=True) # # Load existing data # In[17]: utils.caption(4, 'Loading relevant features') TF = Fabric(locations=thisTf, modules=['']) api = TF.load('book') api.makeAvailableIn(globals()) nodeFeatures = {}
thisTf = '{}/tf/{}'.format(thisRepo, VERSION) # In[5]: testFeature = 'lex0' # # Test # # Check whether this conversion is needed in the first place. # Only when run as a script. # In[6]: if SCRIPT: (good, work) = utils.mustRun(None, '{}/.tf/{}.tfx'.format(thisTf, testFeature), force=FORCE) if not good: stop(good=False) if not work: stop(good=True) # # TF Settings # # * a piece of metadata that will go into these features; the time will be added automatically # * new text formats for the `otext` feature of TF, based on lexical features. # We select the version specific otext material, # falling back on a default if nothing appropriate has been specified in oText. # # We do not do this for the older versions `4` and `4b`. # In[7]:
# In[3]: mysqlZFile = "{}/{}.sql.gz".format(thisMysql, passageDb) mysqlFile = "{}/{}.sql".format(thisTempMysql, passageDb) # # Test # # Check whether this conversion is needed in the first place. # Only when run as a script. # In[4]: # In[4]: if SCRIPT: (good, work) = utils.mustRun(None, mysqlZFile, force=FORCE) if not good: stop(good=False) if not work: stop(good=True) # In[5]: # In[5]: for path in (thisMysql, thisTempMysql): if not os.path.exists(path): os.makedirs(path) # # Collect #