Example #1
0
 def cem_value_specifier(self):
     return (self.cem_phrase
             + Optional((I('is') | I('was') | I('were')) + Optional(I('reported') | I('found') | I('calculate') | I('measured') | I('shown') | I('found'))
             + Optional(I('to'))).hide() \
             + Optional((I('exhibit') | I('exhibits') | I('exhibiting') | R('^show[s]*$') | I('demonstrates') | I('undergoes') | I('have') | I('has') | I('having') | I('determined') | I('with'))).hide() \
             + Optional(I('the') | I('a') | I('an')).hide() \
             + Optional(I('value') | I('values')).hide() \
             + Optional(I('varies') + I('from')).hide() \
             + Optional(W('=') | W('~') | W('≈') | W('≃') | I('was') | I('is') | I('at') | I('as') | I('near') | I('above') | I('below')).hide() \
             + Optional(I('in') + I('the') + I('range') | I('ranging')).hide() \
             + Optional(I('of') | I('about') | I('from') | I('approximately') | I('around') | (I('high') + I('as')) | (I('higher') | I('lower') + I('than'))).hide() \
             + self.value_phrase \
             + Optional(I('as') | I('of') | I('for')).hide() \
             + Optional(I('its') | I('their') | I('the')).hide() + self.specifier_phrase)('root_phrase')
Example #2
0
 def specifier_cem_value(self):
     return (Optional(I('the') | I('a') | I('an') | I('its') | I('with')).hide() \
             + self.specifier_phrase \
             + Optional(I('of') | I('in') | I('for')).hide() \
             + Optional(
             I('bulk') | I('powdered') | I('doped') | I('the') | I('a') | I('an') | I('these') | I('those') | I('this') | I('that')).hide() \
             + self.cem_phrase \
             + Optional(I('is') | I('was') | I('were') | I('occurs') | I('of') | (
             I('can') + I('be') + I('assigned') + Optional(I('at') | I('to')))).hide() \
             + Optional(I('observed') | I('determined') | I('measured') | I('calculated') | I('found')).hide() \
             + Optional(I('in') + I('the') + I('range') + I('of') | I('ranging') + I('from') | I('as') | I('to') + I('be')
             | I('about') | I('over') | (I('higher') | I('lower')) + I('than') | I('above')).hide() \
             + Optional(W('=') | W('~') | W('≈') | W('≃') | I('of') | I('was') | I('is') | I('at') | I('as') | I('near') | I('above') | I('below')).hide()
             + Optional(lbrct).hide() \
             + (self.value_phrase)
             + Optional(rbrct))('root_phrase')
Example #3
0
 def value_phrase(self):
     number = R('^[\+\-–−]?\d+(\.\d+)?$')
     joined_range = R('^[\+\-–−]?\d+(\.\d+)?[\-–−~∼˜]\d+(\.\d+)?$')(
         'raw_value').add_action(merge)
     spaced_range = (number + (R('^[\-–−~∼˜]$') + number
                               | number))('raw_value').add_action(merge)
     to_range = (number + I('to') + number)('raw_value').add_action(join)
     plusminus_range = (number + R('±') + number)('value').add_action(join)
     value_range = (Optional(R('^[\-–−]$')) +
                    (plusminus_range | joined_range | spaced_range
                     | to_range))('raw_value').add_action(merge)
     value_single = (Optional(R('^[~∼˜\<\>]$')) + Optional(R('^[\-–−]$')) +
                     number)('raw_value').add_action(merge)
     inumber = (R('\d*\.?\d*[i]$')).add_action(join)
     # inumber = R('^([-+]?(\d+\.?\d*|\d*\.?\d+)([Ee][-+]?[0-2]?\d{1,2})?[r]?|[-+]?((\d+\.?\d*|\d*\.?\d+)([Ee][-+]?[0-2]?\d{1,2})?)?[i]|[-+]?(\d+\.?\d*|\d*\.?\d+)([Ee][-+]?[0-2]?\d{1,2})?[r]?[-+]((\d+\.?\d*|\d*\.?\d+)([Ee][-+]?[0-2]?\d{1,2})?)?[i])$')
     ivalue = (R('\d*\.?\d*$') + R('^[\+\-–−]?') + inumber).add_action(join)
     value = Optional(lbrct).hide() + (
         ivalue | value_range | value_single)('raw_value') + Not(
             I('wt%') | I('vol%') | I('K') | I('times') | I('GPa') | I('wt')
             | I('vol') | I('%') | I('nm') | I('zF') | W('°') | W('KV')
             | W('kV') | W('MV') | I('kHz') | I('Hz') | I('GHz') | W('V')
             | W('J') | W('eV') | I('MHz')) + Optional(rbrct).hide()
     return value
Example #4
0
 def prefix(self):
     """Specifier and prefix"""
     return (self.specifier_phrase
             + Optional(I('values')).hide()
             + Optional(delim).hide()
             + Optional((I('varies') + I('from')) |
                        R('^increase(s|d)?') | I('falls') | I('reaches')).hide()
             + Optional(I('steeply')).hide()
             + Optional(I('recorded') | I('reported')).hide()
             + Optional(I('of') | I('was') | I('is') | I('at') | I('near') |
                        I('above') | I('below') | I('with') | I('to') | I('were') | I('a')).hide()
             + Optional(I('reported') | I('determined') |
                        I('estimated') | I('found') | I('occurs')).hide()
             + Optional(I('temperatures')).hide()
             + Optional(I('as') | (I('to') + I('be'))).hide()
             + Optional(I('in') + I('the') + I('range')).hide()
             + Optional(I('as') + I('high') + I('as'))
             + Optional(I('ranging') + I('from')).hide()
             + Optional(I('of')).hide()
             + Optional(I('rather') | I('quite')).hide()
             + Optional(I('high') | I('low') | I('maximum') | I('minimum')).hide()
             + Optional(I('the')).hide()
             + Optional(delim | lbrct | rbrct)
             + Optional(
                 I('of') | I('about') | I('approximately') | I('typically') | I('ca.') | I('around') | I('at') | I(
                     'above') | I('below') | I('high') | I('low')
                 | ((I('higher') | I('lower') | I('more') | I('less')) + I('than')) | I('order') | (
                             I('for') + I('instance')) | (I('up') + I('to')) | I('reaching') | I('value')).hide()
             + Optional(I('a') | I('an') | I('as')).hide()
             + Optional(I('maximum')).hide()
             + Optional(I('of')).hide()
             + ZeroOrMore(lbrct | delim | rbrct)
             + Optional(self.specifier_phrase)
             + Optional(I('of')).hide()
             + Optional(I('the')).hide()
             + Optional(I('order')).hide()
             + Optional((I('up') | I('equal')) + I('to')).hide()
             + Optional(I('of')).hide()
             + ZeroOrMore(lbrct | delim | rbrct)
             + Optional(W('=') | W('~') | W('≈') |
                        W('≃') | W('>') | W('<')).hide()
             + ZeroOrMore(lbrct | delim | rbrct).hide()).add_action(join)
Example #5
0
class Solubility(BaseModel):
    value = StringType()
    units = StringType()


Compound.solubility = ListType(ModelType(Solubility))

# In[38]:

import re
from chemdataextractor.parse import R, I, W, Optional, merge

# prefix = (R(u'^m\.?p\.?$', re.I) | I(u'melting') + I(u'point')).hide()
prefix = (I(u'solubility')).hide() + Optional(
    W('=') | I('of') | I('was') | I('is') | I('at')).hide() + Optional(
        I('in') + I('the') + I('range') + Optional(I('of'))
        | I('about')).hide()

# delim = R(u'^[:;\.,]$')
value = R(u'^\d+(\.\d+)?$')(u'value')

units = (W(u'nM') | W(u'μM') | W(u'mM') | W(u'μg')
         | W(u'mg'))(u'units').add_action(merge)

so = (prefix + Optional(R('\w+\s\w+')).hide() + value + units)(u'so')

# units = (W(u'°') + Optional(R(u'^[CFK]\.?$')))(u'units').add_action(merge)
# value = R(u'^\d+(\.\d+)?$')(u'value')
# bp = (prefix + + value + units)(u'bp')
    Class for full list of spin-coating step parameters for full process.
    """
    temps = ListType(ModelType(AnnealTemp))
    times = ListType(ModelType(AnnealTime))


# Associating anneal parameters with a chemical object
Compound.anneal = ListType(ModelType(Anneal))  # currently not working

# Defining object parameters for the AnnealParser parser
# Deliminators
delim = R('^[;:,\./]$').hide()

# Defining formats for annealing temperature and units
tempprefix = (I('at') | I('or')).hide()
tempunits = (W('°') + R('^[CFK]\.?$'))('tempunits').add_action(merge)
tempvalue = R('^\d{2,4}?$')('tempvalue').add_action(merge) + Optional(delim)

# Defining formats for spin-coating time and time units
timeprefix = I('for').hide()
timeunits = (
    R('^s?(ec|econds)?$') | R('^m?(in|inute)?(s)?$')
    | R('^h?(ou)?(r)?(s)?$'))('timeunits').add_action(join) + Optional(delim)
timevalue = R('^\d{,2}$')('timevalue') + Optional(delim)

# Putting everything together
temp = (tempvalue)('temp')
temps = (temp + ZeroOrMore(
    ZeroOrMore(tempprefix | tempunits | delim | W('and')).hide() +
    temp))('temps')
time = (timevalue)('time')
Example #7
0
class Obstructions:  #Class for bracket
    bracket = Optional(R(u'\('))
    curlLine = Optional(R(u'\~'))
    of = Optional(W(u'of'))
    hyphen = Optional(R(u'\-'))
    all = bracket + curlLine + of + hyphen
Example #8
0
class BpRegex:
    prefix = (R(u'^b\.?p\.?$', re.I)
              | I(u'boiling') + I(u'point')).hide()  #u-createsunicodestring
    units = (W(u'°') + Optional(R(u'^[CFK]\.?$')))(u'units').add_action(merge)
    value = R(u'^\d+(\.\d+)?$')(u'value')
    bp = (prefix + value + units)(u'bp')
#from chemdataextractor.doc import Sentence
from chemdataextractor.parse import R, I, W, Optional, merge, join
from chemdataextractor.parse.base import BaseSentenceParser
from chemdataextractor.utils import first

frequency_value = ((R("^\d+?\.\d+?$") | R("^\d+?$")) +
                   (W('kHz') | W('MHz') | W('GHz')
                    | W('Hz')))('frequencyvalue').add_action(join)


class DielectricConstantFrequencyParser(BaseSentenceParser):
    root = frequency_value

    def interpret(self, result, start, end):
        raw_value = first(result.xpath('//frequencyvalue/text()'))
        #print (type(raw_value))
        frequency = self.model(frequency=raw_value)
        #print (frequency)
        yield frequency
Example #10
0

# Associate the spin-coating class with a given compound.  May be worth
# getting rid of for our eventual implementation, not yet sure.
Compound.spin_coat = ListType(ModelType(SpinCoat))

# Variable assignments
# Deliminators -- hide from tokenization
delim = R('^[;:,\./]$').hide()

# Defining formats for spin-coating value and units
spdunits = (
    R(u'^r(\.)?p(\.)?m(\.)?$') | R(u'^r(\.)?c(\.)?f(\.)?$')
    | R(u'^([x×]?)(\s?)?g$'))('spdunits').add_action(join) + ZeroOrMore(delim)
spdvalue = Optional(
    W('(')).hide() + R(u'^\d+(,\d+)?[0][0]$')('spdvalue') + Optional(
        W(')')).hide()

# Defining formats for spin-coating time and time units
timeprefix = I('for').hide()
timeunits = (
    R('^s?(ec|econds)?$') | R('^m?(in|inute)?(s)?$')
    | R('^h?(ou)?(r)?(s)?$'))('timeunits').add_action(join) + Optional(delim)
timevalue = R('^\d{,3}$')('timevalue') + Optional(delim)  #<3 digits

# Putting everything together
spdprefix = I('at').hide()
spd = (spdvalue)('spd')
spds = (
    spd +
    ZeroOrMore(ZeroOrMore(spdunits | delim | W('and')).hide() + spd))('spds')
Example #11
0
from chemdataextractor.model import BaseModel, StringType, ListType, ModelType
import re
from chemdataextractor.parse import R, I, W, Optional, merge


class Capacity(BaseModel):
    value = StringType()
    units = StringType()


Compound.capacity = ListType(ModelType(Capacity))

prefix = (I(u'capacity') | I(u'CO2') + I(u'uptake')).hide()
#Left the optional in because if I take it out then there is a syntax error on line 33
units = (W(u'mmol g-1') +
         Optional(R(u'^[CFK]\.?$')))(u'units').add_action(merge)
value = R(u'^\d+(\.\d+)?$')(u'value')

cp = (prefix + value + units)(u'cp')

from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first


class CpParser(BaseParser):
    root = cp

    def interpret(self, result, start, end):
        compound = Compound(capacity=[
            Capacity(value=first(result.xpath('./value/text()')),
Example #12
0
            context["footflowunit"] = flowunit
        if context:
            # print(context)
            c.conv.append(Conv(**context))
        yield c


unit = R("mass|wt|weight|mol|mole|m|vol")(u"value")

compname = (R("$composition", re.IGNORECASE))(u"phrase")
feedname = (R("mixture", re.IGNORECASE))(u"phrase")
conditionname = R("condition", re.IGNORECASE)(u"phrase")
conditionname = R("conditions?", re.IGNORECASE)(u"phrase")
compprefix = (compname | feedname)(u"prefix")

units = (unit + (R(u'%') | W(u"percent")))(u'compunits')
value = (R(u"^\d{1,2}(\.\d)?$"))(u'compvalue')

name = (R("H2") | R("H2O") | R("CO") | R("N2") | R("He"))(u"compname")
# comppre = (compprefix + OneOrMore(SkipTo(name) + name + SkipTo(value) + value + Optional(units)))("phrase")
precomp = (OneOrMore(value | name | Any()))(u"phrase")

comp = (precomp)('comp')


class FootCompParser(BaseParser):
    root = comp

    def __init__(self):
        pass
Example #13
0
? Matches either once or zero times; marks the p as optional
$ Matches at the end of a line
I means the case is not sensitive
| matches either b.p. or boiling point
.hide() I think just makes it so the prefix does not show up in the output but
it still searches the document for the text
'''
prefix = (R(u'^b\.?p\.?$', re.I) | I(u'boiling') + I(u'point')).hide()
'''
W matches the degree symbol exactly 
Optional means that the unit will be included if it is in the text
R matches the token text to the regular experssion
^ matches any string that contains a C OR F OR K
'''

units = (W(u'°') + Optional(R(u'^[CFK]\.?$')))(u'units').add_action(merge)
'''
R matches the token text to a regular experssion
^ matches any string that starts with a digit
\d looks for a number between 0 and 9
+ matches following digits one or more times
\. matches . symbol if it is there
() is the capturing group
? matches the expression zero or one times
'''
value = R(u'^\d+(\.\d+)?$')(u'value')

bp = (prefix + value + units)(u'bp')

from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first
Example #14
0
 def connection(self):
     return (I('at') | W('=') | I('of') | I('about') | I('for')
             | I('as') + I('regards') | I('attributed') + I('to')
             | I('concerning')).add_action(join)
Example #15
0
            values = [(value[i], names[i]) for i, j in enumerate(value)]
            context["values"] = values
        elif value and len(value) == 1 and len(names) < 1:
            context["values"] = value

        # print("table comp")
        print(context)
        if context:
            c.comp.append(Comp(**context))
        yield c


no = (R("calcin", re.IGNORECASE) | R("thermo", re.IGNORECASE)
      | R("equi", re.IGNORECASE)).hide()
prefix = (R("^temp", re.IGNORECASE) | R("^T[^OP]") | R("^T$"))("prefix")
units = (Optional(W(u'°')) + R(u'[CFK]'))(u'units').add_action(merge)
temp = (Optional(Not(no)) + SkipTo(prefix) + prefix +
        Optional(SkipTo(units) + units))(u"temp")

temphead = (temp)(u"tempphrase")


class TempHeadingParser(BaseParser):
    root = temphead

    def interpret(self, result, start, end):
        """"""
        # print("inside temp")
        # print(etree.tostring(result))
        # print()
        # print(lollol)
#from chemdataextractor.doc import Sentence
from chemdataextractor.parse import R, I, W, Optional, merge, join
from chemdataextractor.parse.base import BaseSentenceParser
from chemdataextractor.utils import first

wavelength_value = (
    (R("^\d+?$") + W('nm')) | (R("^\d+?\.\d+?$") + W('μm')) |
    (R("^\d+?\.\d+?$") + W('nm')))('wavelengthvalue').add_action(join)


class RefractiveIndexWavelengthParser(BaseSentenceParser):
    root = wavelength_value

    def interpret(self, result, start, end):
        raw_value = first(result.xpath('//wavelengthvalue/text()'))
        wavelength = self.model(wavelength=raw_value)
        yield wavelength