Example #1
0
class SpinCoat(BaseModel):
    """
    Class for full list of spin-coating step parameters for full process.
    """
    #solvent = StringType(contextual=True)
    spds = ListType(ModelType(SpinSpd))
    times = ListType(ModelType(SpinTime))
Example #2
0
class Conv(BaseModel):
    tempvalue = StringType(contextual=True)
    tempunits = StringType(contextual=True)
    convvalue = StringType()
    convtype = StringType(contextual=True)
    convunits = StringType(contextual=True)
    captioninfo = StringType(contextual=True)
    check = StringType(contextual=True)
    catname = StringType(contextual=True)
    captioncatname = StringType(contextual=True)
    comp = StringType(contextual=True)
    unit = StringType(contextual=True)
    flow = StringType(contextual=True)
    flowunit = StringType(contextual=True)
    footflow = StringType(contextual=True)
    footflowunit = StringType(contextual=True)
    pressure = StringType(contextual=True)
    presunits = StringType(contextual=True)
    tofvalue = StringType()
    yieldtest = StringType(contextual=True)
    selectivity = ListType(ModelType(Selectivity, contextual=True),
                           contextual=True)
from chemdataextractor.model import BaseModel, StringType, ListType, ModelType, Compound
import re
from chemdataextractor.parse import R, I, W, Optional, merge, join, OneOrMore, Any, ZeroOrMore, Start
from chemdataextractor.parse.cem import chemical_name, chemical_label
from chemdataextractor.parse.base import BaseParser
from chemdataextractor.parse.common import lrb, rrb, delim
from chemdataextractor.utils import first
from chemdataextractor.doc import Paragraph, Heading, Sentence
from lxml import etree

class CurieTemperature(BaseModel):
    specifier = StringType()
    value = StringType()
    units = StringType()

Compound.curie_temperatures = ListType(ModelType(CurieTemperature))

#%% [markdown]
# Now define parse elements that describe how to identify the entities in text. Think of these as tagging processes.

#%%
# Define a very basic entity tagger
specifier = (I('curie') + I('temperature') + Optional(lrb | delim) + Optional(R('^T(C|c)(urie)?')) + Optional(rrb) | R('^T(C|c)(urie)?'))('specifier').add_action(join)
units = (R('^[CFK]\.?$'))('units').add_action(merge)
value = (R('^\d+(\.\,\d+)?$'))('value')

#%% [markdown]
# Note we tag each with a unique identifier that will be used later. Now let the entities in a sentence be any ordering of these (or whatever ordering you feel like). Here we specify that the value and units must coincide, but this does not have to be the case. 
# 
# We also define an extremely general parse phrase, this will be used to identify candidate sentences.
Example #4
0
# In[6]:

d.records.serialize()

# In[37]:

from chemdataextractor.model import BaseModel, StringType, ListType, ModelType


class Solubility(BaseModel):
    value = StringType()
    units = StringType()


Compound.solubility = ListType(ModelType(Solubility))

# In[38]:

import re
from chemdataextractor.parse import R, I, W, Optional, merge

# prefix = (R(u'^m\.?p\.?$', re.I) | I(u'melting') + I(u'point')).hide()
prefix = (I(u'solubility')).hide() + Optional(
    W('=') | I('of') | I('was') | I('is') | I('at')).hide() + Optional(
        I('in') + I('the') + I('range') + Optional(I('of'))
        | I('about')).hide()

# delim = R(u'^[:;\.,]$')
value = R(u'^\d+(\.\d+)?$')(u'value')
class Anneal(BaseModel):
    """
    Class for full list of spin-coating step parameters for full process.
    """
    temps = ListType(ModelType(AnnealTemp))
    times = ListType(ModelType(AnnealTime))
    Class for each spin-coating time in a spin-coating process.
    """
    timevalue = StringType()
    timeunits = StringType(contextual=True)


class Anneal(BaseModel):
    """
    Class for full list of spin-coating step parameters for full process.
    """
    temps = ListType(ModelType(AnnealTemp))
    times = ListType(ModelType(AnnealTime))


# Associating anneal parameters with a chemical object
Compound.anneal = ListType(ModelType(Anneal))  # currently not working

# Defining object parameters for the AnnealParser parser
# Deliminators
delim = R('^[;:,\./]$').hide()

# Defining formats for annealing temperature and units
tempprefix = (I('at') | I('or')).hide()
tempunits = (W('°') + R('^[CFK]\.?$'))('tempunits').add_action(merge)
tempvalue = R('^\d{2,4}?$')('tempvalue').add_action(merge) + Optional(delim)

# Defining formats for spin-coating time and time units
timeprefix = I('for').hide()
timeunits = (
    R('^s?(ec|econds)?$') | R('^m?(in|inute)?(s)?$')
    | R('^h?(ou)?(r)?(s)?$'))('timeunits').add_action(join) + Optional(delim)
Example #7
0
from chemdataextractor.model import Compound, UvvisSpectrum, UvvisPeak, BaseModel, StringType, ListType, ModelType
from chemdataextractor.parse.common import hyphen,lbrct, dt, rbrct
from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first

from chemdataextractor.parse.actions import strip_stop, merge, join
from chemdataextractor.parse.elements import W, I, T, R, Optional, ZeroOrMore, OneOrMore, Or, And, Not, Any
from chemdataextractor.parse.cem import chemical_name,cem, chemical_label, lenient_chemical_label, solvent_name
from chemdataextractor.doc import Paragraph, Sentence, Caption, Figure,Table, Heading
from chemdataextractor.doc.table import Table, Cell

class Ff(BaseModel):
    value = StringType()
    units = StringType()

Compound.ff_pattern = ListType(ModelType(Ff))

common_text = R('(\w+)?\D(\D+)+(\w+)?').hide()
units = (W(u'%') | I(u'percent'))(u'units')
value = R(u'\d+(\.\d+)?')(u'value')

abbrv_prefix = (I(u'FF') | I(u'ff')).hide()
words_pref = (I(u'fill') + I(u'factor')).hide()
hyphanated_pref = (I(u'fill') + I(u'-') + I('factor')).hide()
joined_range = R('^[\+\-–−]?\d+(\.\d+)?[\-–−~∼˜]\d+(\.\d+)?$')('value').add_action(merge)
spaced_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (R('^[\-–−~∼˜]$') +
                                                                        R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(merge)
to_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (I('to') +
                                                                    R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(join)

prefix = Optional(I('a')).hide() + (Optional(lbrct) + W('FF') + Optional(rbrct) | I('fill') + Optional(I('factor'))
Example #8
0
    timevalue = StringType()
    timeunits = StringType(contextual=True)


class SpinCoat(BaseModel):
    """
    Class for full list of spin-coating step parameters for full process.
    """
    #solvent = StringType(contextual=True)
    spds = ListType(ModelType(SpinSpd))
    times = ListType(ModelType(SpinTime))


# Associate the spin-coating class with a given compound.  May be worth
# getting rid of for our eventual implementation, not yet sure.
Compound.spin_coat = ListType(ModelType(SpinCoat))

# Variable assignments
# Deliminators -- hide from tokenization
delim = R('^[;:,\./]$').hide()

# Defining formats for spin-coating value and units
spdunits = (
    R(u'^r(\.)?p(\.)?m(\.)?$') | R(u'^r(\.)?c(\.)?f(\.)?$')
    | R(u'^([x×]?)(\s?)?g$'))('spdunits').add_action(join) + ZeroOrMore(delim)
spdvalue = Optional(
    W('(')).hide() + R(u'^\d+(,\d+)?[0][0]$')('spdvalue') + Optional(
        W(')')).hide()

# Defining formats for spin-coating time and time units
timeprefix = I('for').hide()
Example #9
0
    Heading(u'Synthesis of HKUST-1-AC'),
    Paragraph(
        u'The BET surface area and CO2 uptake capacity values for the HKUST-1–AC composite were 1381 m2 g−1 and 8.1 mmol g−1 (at 273 K and 1 bar), respectively, representing increases of 70% and 39%, respectively, over the reported values for HKUST-1'
    ))

from chemdataextractor.model import BaseModel, StringType, ListType, ModelType
import re
from chemdataextractor.parse import R, I, W, Optional, merge


class Capacity(BaseModel):
    value = StringType()
    units = StringType()


Compound.capacity = ListType(ModelType(Capacity))

prefix = (I(u'capacity') | I(u'CO2') + I(u'uptake')).hide()
#Left the optional in because if I take it out then there is a syntax error on line 33
units = (W(u'mmol g-1') +
         Optional(R(u'^[CFK]\.?$')))(u'units').add_action(merge)
value = R(u'^\d+(\.\d+)?$')(u'value')

cp = (prefix + value + units)(u'cp')

from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first


class CpParser(BaseParser):
    root = cp
Example #10
0
from chemdataextractor.model import BaseModel, StringType, ListType, ModelType
from chemdataextractor.model import Compound


class BoilingPoint(BaseModel):
    value = StringType()
    units = StringType()


Compound.boiling_points = ListType(ModelType(BoilingPoint))


class MeltingPoint(BaseModel):
    value = StringType()
    units = StringType()


Compound.melting_points = ListType(ModelType(MeltingPoint))
Example #11
0
from chemdataextractor.parse.common import hyphen
from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first
from chemdataextractor.parse.actions import strip_stop
from chemdataextractor.parse.elements import W, I, T, R, Optional, ZeroOrMore, OneOrMore
from chemdataextractor.parse.cem import chemical_name
from chemdataextractor.doc import Paragraph, Sentence


# From my ipynb ...
class Pce(BaseModel):
    value = StringType()
    units = StringType()


Compound.pce_pattern = ListType(ModelType(Pce))

abbrv_prefix = (I(u'PCE') | I(u'PCEs') | I(u'pce')).hide()
words_pref = (I(u'power') + I(u'conversion') + I(u'efficiency')).hide()
hyphanated_pref = (I(u'power-conversion') + I(u'efficiency')).hide()
prefix = abbrv_prefix | words_pref | hyphanated_pref

common_text = R('(\w+)?\D(\D+)+(\w+)?').hide()
units = (W(u'%') | I(u'percent'))(u'units')
# value = R(u'^\d+(\.\d+)?$')(u'value')
value = R(u'\d+(\.\d+)?')(u'value')

pce_first = (prefix + ZeroOrMore(common_text) + value + units)(u'pce')
pce_second = (value + units + prefix)(u'pce')

pce_pattern = pce_first | pce_second
Example #12
0
from chemdataextractor.parse.common import hyphen,lbrct, dt, rbrct
from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first

from chemdataextractor.parse.actions import strip_stop, merge, join
from chemdataextractor.parse.elements import W, I, T, R, Optional, ZeroOrMore, OneOrMore, Or, And, Not, Any
from chemdataextractor.parse.cem import chemical_name,cem, chemical_label, lenient_chemical_label, solvent_name
from chemdataextractor.doc import Paragraph, Sentence, Caption, Figure,Table, Heading
from chemdataextractor.doc.table import Table, Cell


class Eqe(BaseModel):
    value = StringType()
    units = StringType()

Compound.eqe_pattern = ListType(ModelType(Eqe))

common_text = R('(\w+)?\D(\D+)+(\w+)?').hide()
units = (W(u'%') | I(u'percent'))(u'units')
value = R(u'\d+(\.\d+)?')(u'value')

abbrv_prefix = (I(u'EQE') | I(u'eqe')).hide()
words_pref = (I(u'external') + I(u'quantum') + I(u'efficiency')).hide()
hyphanated_pref = (I(u'external') + I(u'-') + I('quantum') + I(u'efficiency')).hide()
joined_range = R('^[\+\-–−]?\d+(\.\d+)?[\-–−~∼˜]\d+(\.\d+)?$')('value').add_action(merge)
spaced_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (R('^[\-–−~∼˜]$') +
                                                                        R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(merge)
to_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (I('to') +
                                                                    R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(join)

prefix = Optional(I('a')).hide() + (Optional(lbrct) + W('EQE') + Optional(rbrct) | I('external') + Optional(I('quantum')) + Optional((I('efficiency')))
Example #13
0
class Comp(BaseModel):
    values = ListType(StringType())
    unit = StringType(contextual=True)
Example #14
0
from chemdataextractor.model import BaseModel, StringType, ListType, ModelType, Compound
from chemdataextractor.parse import R, I, W, Optional, merge, ZeroOrMore, join, SkipTo
from chemdataextractor.parse.base import BaseParser
from chemdataextractor.parse import ZeroOrMore, Any, OneOrMore, Start, End, Group, Not, T
from chemdataextractor.utils import first


# print("inside heading")
# print(etree.tostring(result))
# print()
class BET(BaseModel):
    value = StringType()
    unit = StringType(contextual=True)


Compound.bet = ListType(ModelType(BET))


class Flow(BaseModel):
    value = StringType()
    unit = StringType(contextual=True)


Compound.flow = ListType(ModelType(Flow))


class CAT(BaseModel):
    name = StringType()


Compound.catalyst_name = ListType(ModelType(CAT))