Example #1
0
    def _build_taggers(self):
        behind, ahead = self.looks.to_rex()
        self._taggers = {}

        for name, version_rexes in self.versions.iteritems():
            self._taggers[name] = []
            if not isinstance(version_rexes, list):
                version_rexes = [version_rexes]
            for rex in self.rexes:
                for version_rex in version_rexes:
                    tagger = rex + self.separator + behind + version_rex + ahead
                    if self.compile_rex:
                        tagger = re.compile(tagger, self.flags)
                    self._taggers[name].append(tagger)
Example #2
0
    def _build_taggers(self):
        """Build a list of regular expressions, each containing a group
        named "version"; store in self._taggers

        """
        behind, ahead = self.looks.to_rex()
        self._taggers = [
            rex + self.separator + behind + self.arbitrary_rex + ahead
            for rex in self.rexes
        ]
        if self.compile_rex:
            self._taggers = [
                re.compile(rex, self.flags) for rex in self._taggers
            ]
Example #3
0
    def _build_taggers(self):
        behind, ahead = self.looks.to_rex()
        self._taggers = {}

        for name, version_rexes in self.versions.iteritems():
            self._taggers[name] = []
            if not isinstance(version_rexes, list):
                version_rexes = [version_rexes]
            for rex in self.rexes:
                for version_rex in version_rexes:
                    tagger = rex + self.separator + behind + version_rex + ahead
                    if self.compile_rex:
                        tagger = re.compile(tagger, self.flags)
                    self._taggers[name].append(tagger)
Example #4
0
    def _build_taggers(self):
        """Build a list of regular expressions, each containing a group
        named "version"; store in self._taggers

        """
        behind, ahead = self.looks.to_rex()
        self._taggers = [
            rex + self.separator + behind + self.arbitrary_rex + ahead
            for rex in self.rexes
        ]
        if self.compile_rex:
            self._taggers = [
                re.compile(rex, self.flags)
                for rex in self._taggers
            ]
Example #5
0
def fetch_versions():
    """Fetch version information from Wikipedia.

    :return: Dictionary mapping version numbers to lists of version labels
    """
    resp = requests.get('http://en.wikipedia.org/wiki/MATLAB')
    parsed = BeautifulSoup(resp.content)

    history_headline = parsed.find(id='Release_history')
    history_table = history_headline.find_next(
        'table',
        class_=re.compile(r'wikitable'),
    )
    history_rows = history_table.find_all('tr')

    return dict(
        filter(
            lambda item: item is not None,
            (parse_version_row(row) for row in history_rows),
        )
    )
Example #6
0
)

spgr = RexTagger(
    'spgr',
    [
        r'\Wspgr\W',
    ]
)

### Trajectories ###

epi = RexTagger(
    'epi',
    [
        r'echo{dlm}planar'.format(dlm=delimiter),
        re.compile(r'EPI'),
    ]
)

spiral = RexTagger(
    'spiral',
    [
        r'spiral{dlm}in'.format(dlm=delimiter),
        r'spiral{dlm}out'.format(dlm=delimiter),
    ]
)

spiral_context = MultiRexTagger(
    'spiral',
    [
        r'spiral',
Example #7
0
    ]
)

realign = RexTagger(
    'realign',
    [
        r'realign',
        r'{mov}{dlm}correct'.format(
            mov=mov_ptn,
            dlm=delimiter
        ),
        r'(motion|movement|translation|rotation){dlm}parameter'.format(
            dlm=delimiter,
        ),
        r'automat(ed|ic){dlm}image{dlm}registration'.format(dlm=delimiter),
        re.compile('\WAIR\W'),
        r'{cor}{dlm}(for)?{dlm}(bulk|whole|participants?|subjects?)?{dlm}(head)?{dlm}{mov}'.format(
            cor=cor_ptn,
            dlm=delimiter,
            mov=mov_ptn,
        ),
        r'{cor}{dlm}(for)?.{{,25}}and{dlm}(bulk|whole|participants?|subjects?)?{dlm}(head)?{dlm}{mov}'.format(
            cor=cor_ptn,
            dlm=delimiter,
            mov=mov_ptn,
        ),
        r'mcflirt',
        r'3dvolreg',
    ]
)
Example #8
0
    r'small{dlm}volume{dlm}correction'.format(dlm=delimiter),
    r'\Wsvc\W',
])

# Tag AlphaSim separately from general Monte Carlo methods to identify
# incorrect use when applied without smoothness estimation
alphasim = RexTagger('alphasim', [
    r'alpha{dlm}sim'.format(dlm=delimiter),
    r'clustsim',
])

alphasim_context = MultiRexTagger('alphasim', [
    r'monte{dlm}carlo'.format(dlm=delimiter),
], [
    r'rest{dlm}fmri'.format(dlm=delimiter),
    re.compile(r'AFNI'),
    re.compile(r'REST'),
])

monte = RexTagger('monte', [
    r'alpha{dlm}sim'.format(dlm=delimiter),
    r'clustsim',
    r'monte{dlm}carlo{dlm}correct'.format(dlm=delimiter),
])

monte_context = MultiRexTagger('monte', [
    r'monte{dlm}carlo'.format(dlm=delimiter),
], [
    r'multiple{dlm}comparison'.format(dlm=delimiter),
    r'rest{dlm}fmri'.format(dlm=delimiter),
    r'threshold',
Example #9
0
rest = RexTagger(
    'rest',
    [
        r'resting{dlm}state{dlm}fmri{dlm}data{dlm}analysis{dlm}toolkit'.format(dlm=delimiter),
        r'rest{dlm}(by)?{dlm}song{dlm}xiao'.format(dlm=delimiter),
        r'resting\-fmri\.sourceforge\.net',
        r'sourceforge\.net/projects/resting\-fmri',
        r'restfmri\.net',
    ]
)

aal = RexTagger(
    'aal',
    [
        re.compile(r'\WAAL\W'),
        r'automatic{dlm}anatomic(al)?{dlm}label'.format(dlm=delimiter),
    ]
)

snpm = RexTagger(
    'snpm',
        [
        r'\Wsnpm\W',
        r'statistical{dlm}non{dlm}parametric{dlm}mapping'.format(dlm=delimiter),
    ]
)

spmd = RexTagger(
    'spmd',
        [
Example #10
0
import functools

from neurotrends.config import re

REX_TYPE = type(re.compile(''))
DEF_FLAGS = re.IGNORECASE | re.VERBOSE
NCHAR = 50
UNKNOWN_VERSION = '?'


def rex_wrap(rex, wrap=''):
    return ur'({wrap}{rex})'.format(rex=rex, wrap=wrap)


def rex_named(rex, name):
    return rex_wrap(rex, '?P<{}>'.format(name))


rex_noncap = functools.partial(rex_wrap, wrap='?:')
rex_posbehind = functools.partial(rex_wrap, wrap='?<=')
rex_negbehind = functools.partial(rex_wrap, wrap='?<!')
rex_posahead = functools.partial(rex_wrap, wrap='?=')
rex_negahead = functools.partial(rex_wrap, wrap='?!')


# TODO: Only keep earliest tags
def uextend(base, vals):
    for val in vals:
        if val not in base:
            base.append(val)
Example #11
0
### Sequences ###

mprage = RexTagger('mprage', [
    r'\Wmp{dlm}rage\W'.format(dlm=delimiter),
])

spgr = RexTagger('spgr', [
    r'\Wspgr\W',
])

### Trajectories ###

epi = RexTagger('epi', [
    r'echo{dlm}planar'.format(dlm=delimiter),
    re.compile(r'EPI'),
])

spiral = RexTagger('spiral', [
    r'spiral{dlm}in'.format(dlm=delimiter),
    r'spiral{dlm}out'.format(dlm=delimiter),
])

spiral_context = MultiRexTagger('spiral', [
    r'spiral',
], [
    r'mri',
    r'bold',
    r'imag',
    r'scan',
    r'data',
Example #12
0
from neurotrends.config import re
from neurotrends.tagger import RexTagger, MultiRexTagger
from ..misc import delimiter

ge = RexTagger(
    'ge',
    [
        r'general{dlm}electric'.format(dlm=delimiter),
    ]
)

# Use MultiRexTagger for GE to disambiguate names: e.g. GE Smith
ge_context = MultiRexTagger(
    'ge',
    [
        re.compile(r'\bGE\b'),
    ],
    [
        r'mri',
        r'scan',
        r'tesla',
    ]
)

siemens = RexTagger(
    'siemens',
    [r'siemens']
)

philips = RexTagger(
    'philips',
Example #13
0
surfrend = RexTagger('surfrend', [
    r'\Wsurf{dlm}rend'.format(dlm=delimiter),
])

rest = RexTagger('rest', [
    r'resting{dlm}state{dlm}fmri{dlm}data{dlm}analysis{dlm}toolkit'.format(
        dlm=delimiter),
    r'rest{dlm}(by)?{dlm}song{dlm}xiao'.format(dlm=delimiter),
    r'resting\-fmri\.sourceforge\.net',
    r'sourceforge\.net/projects/resting\-fmri',
    r'restfmri\.net',
])

aal = RexTagger('aal', [
    re.compile(r'\WAAL\W'),
    r'automatic{dlm}anatomic(al)?{dlm}label'.format(dlm=delimiter),
])

snpm = RexTagger('snpm', [
    r'\Wsnpm\W',
    r'statistical{dlm}non{dlm}parametric{dlm}mapping'.format(dlm=delimiter),
])

spmd = RexTagger('spmd', [
    r'\Wspmd\W',
    r'''
            statistical{dlm}parametric{dlm}mapping{dlm}diagnosis
        '''.format(dlm=delimiter),
])
Example #14
0
def rex_compile(rex, flags=DEF_FLAGS):
    if isinstance(rex, REX_TYPE):
        return rex
    return re.compile(rex, flags=flags)
Example #15
0
        r'fiasco',
        r'''
            functional{dlm}imaging{dlm}
                analysis{dlm}software'
        '''.format(dlm=delimiter),
    ],
)

fidl = RexTagger(
    'fidl',
    [
        r'fidl',
    ],
)

fiswidgets = RexTagger(
    'fiswidgets',
    [
        r'fiswidgets',
    ],
)

itk = RexTagger(
    'itk',
    [
        re.compile(r'\bITK\b'),
        r'itk\.org',
        r'insight{dlm}tool{dlm}kit'.format(dlm=delimiter),
    ],
)
Example #16
0
import functools

from neurotrends.config import re

REX_TYPE = type(re.compile(''))
DEF_FLAGS = re.IGNORECASE | re.VERBOSE
NCHAR = 50
UNKNOWN_VERSION = '?'


def rex_wrap(rex, wrap=''):
    return ur'({wrap}{rex})'.format(rex=rex, wrap=wrap)


def rex_named(rex, name):
    return rex_wrap(rex, '?P<{}>'.format(name))


rex_noncap = functools.partial(rex_wrap, wrap='?:')
rex_posbehind = functools.partial(rex_wrap, wrap='?<=')
rex_negbehind = functools.partial(rex_wrap, wrap='?<!')
rex_posahead = functools.partial(rex_wrap, wrap='?=')
rex_negahead = functools.partial(rex_wrap, wrap='?!')


# TODO: Only keep earliest tags
def uextend(base, vals):
    for val in vals:
        if val not in base:
            base.append(val)
Example #17
0
    ],
)

tmpdrv_context = MultiRexTagger(
    "tmpdrv",
    [r"(hrf|hdr|ha?emodynamic{dlm}response)".format(dlm=delimiter)],
    [r"first{dlm}derivative".format(dlm=delimiter)],
    separator="[^.,:;?]*",
)

dspdrv = RexTagger("dspdrv", [r"dispersion{dlm}derivative".format(dlm=delimiter)])

fir = RexTagger(
    "fir",
    [
        re.compile(r"\WFIR\W"),
        r"\Wfir{dlm}\)?(basis|set)".format(dlm=delimiter),
        r"finite{dlm}impulse{dlm}response".format(dlm=delimiter),
    ],
)

fir_context = MultiRexTagger(
    "fir",
    [
        r"not?{dlm}assum".format(dlm=delimiter),
        r"not{dlm}make{dlm}(any)?{dlm}assum".format(dlm=delimiter),
        r"ma[dk](es?|ing)?{dlm}no{dlm}assum".format(dlm=delimiter),
    ],
    [
        hrf_ptn,
        r"response{dlm}shape".format(dlm=delimiter),
Example #18
0
alphasim = RexTagger(
    'alphasim',
    [
        r'alpha{dlm}sim'.format(dlm=delimiter),
        r'clustsim',
    ]
)

alphasim_context = MultiRexTagger(
    'alphasim',
    [
        r'monte{dlm}carlo'.format(dlm=delimiter),
    ],
    [
        r'rest{dlm}fmri'.format(dlm=delimiter),
        re.compile(r'AFNI'),
        re.compile(r'REST'),
    ]
)

monte = RexTagger(
    'monte',
    [
        r'alpha{dlm}sim'.format(dlm=delimiter),
        r'clustsim',
        r'monte{dlm}carlo{dlm}correct'.format(dlm=delimiter),
    ]
)

monte_context = MultiRexTagger(
    'monte',
Example #19
0
        r'event{dlm}related{dlm}tatistic'.format(dlm=delimiter),
        r'event{dlm}related{dlm}functional{dlm}mri'.format(dlm=delimiter),
        r'event{dlm}related{dlm}functional{dlm}magnetic'.format(dlm=delimiter),
        r'\Wer{dlm}fmri\W'.format(dlm=delimiter),
        r'single{dlm}event{dlm}design'.format(dlm=delimiter),
        r'event{dlm}of{dlm}interest'.format(dlm=delimiter),
        r'stick{dlm}function'.format(dlm=delimiter),
    ],
    rexes_negative=[
        r'\Wpp\.{dlm}\d+'.format(dlm=delimiter),
        r'\d+[\s-]+\d+',
        r'neuroimage',
        r'neuron',
        r'proceedings',
        r'transactions',
        re.compile(r'Trans[^a-zA-Z]'),
        re.compile(r'Research'),
        r'pnas',
        r'biol(ogical)?{dlm}psych'.format(dlm=delimiter),
        r'j(ournal{dlm}of)?{dlm}neurosci'.format(dlm=delimiter),
        r'arch(ives{dlm}of)?{dlm}gen'.format(dlm=delimiter),
        r'brain{dlm}cogn'.format(dlm=delimiter),
        r'journal',
        r'plos',
        r'frontiers',
    ]
)

block = RexTagger(
    'block',
    [
Example #20
0
    },
    'pdf': {
        'class': PDFDocument,
        'field': 'publisher_pdf',
    },
}

# Some PubMed records encode dates in unhelpful formats like "2006 May-Aug"
# or "1999 Jan 1-15". This pattern extracts the useful part of these strings
# ("2006 May", "1999 Jan") and discards the rest.
month_range_pattern = re.compile(
    r'''
        (
            \d{4}
            \s
            [a-z]{3}
        )
        [\s\-]
        .*
    ''',
    re.I | re.X,
)


# TODO: Test me
def parse_publication_date(date_text):
    date_text = re.sub(month_range_pattern, '\\1', date_text)
    try:
        return dateparser.parse(date_text)
    except (TypeError, ValueError, AttributeError):
        return None
Example #21
0
        r'fiasco',
        r'''
            functional{dlm}imaging{dlm}
                analysis{dlm}software'
        '''.format(dlm=delimiter),
    ],
)

fidl = RexTagger(
    'fidl',
    [
        r'fidl',
    ],
)

fiswidgets = RexTagger(
    'fiswidgets',
    [
        r'fiswidgets',
    ],
)

itk = RexTagger(
    'itk',
    [
        re.compile(r'\bITK\b'),
        r'itk\.org',
        r'insight{dlm}tool{dlm}kit'.format(dlm=delimiter),
    ],
)
Example #22
0
    'time{dlm}(and{dlm}dispersion)?{dlm}derivative'.format(dlm=delimiter),
])

tmpdrv_context = MultiRexTagger('tmpdrv', [
    r'(hrf|hdr|ha?emodynamic{dlm}response)'.format(dlm=delimiter),
], [
    r'first{dlm}derivative'.format(dlm=delimiter),
],
                                separator='[^.,:;?]*')

dspdrv = RexTagger('dspdrv', [
    r'dispersion{dlm}derivative'.format(dlm=delimiter),
])

fir = RexTagger('fir', [
    re.compile(r'\WFIR\W'),
    r'\Wfir{dlm}\)?(basis|set)'.format(dlm=delimiter),
    r'finite{dlm}impulse{dlm}response'.format(dlm=delimiter),
])

fir_context = MultiRexTagger('fir', [
    r'not?{dlm}assum'.format(dlm=delimiter),
    r'not{dlm}make{dlm}(any)?{dlm}assum'.format(dlm=delimiter),
    r'ma[dk](es?|ing)?{dlm}no{dlm}assum'.format(dlm=delimiter),
], [
    hrf_ptn,
    r'response{dlm}shape'.format(dlm=delimiter),
    r'shape{dlm}of{dlm}(the)?{dlm}response'.format(dlm=delimiter),
    r'time{dlm}cource'.format(dlm=delimiter),
],
                             separator='[^.,:;?]*')
Example #23
0
def get_matlab_versions(overwrite=False):
    """Get MATLAB versions from Wikipedia.

    :param overwrite: Overwrite existing data?
    :return: MATLAB versions

    """
    # Get version file
    version_file = os.path.join(trendpath.data_dir, 'matlab-versions.shelf')

    # Used saved versions if version file exists and not overwrite
    if os.path.exists(version_file) and not overwrite:
        shelf = shelve.open(version_file)
        versions = shelf['versions']
        shelf.close()
        return versions

    # Open Wikipedia page
    response = requests.get('http://en.wikipedia.org/wiki/MATLAB')
    soup = BeautifulSoup(response.content)

    # Find "Release History" table
    history_headline = soup.find(id='Release_history')
    history_table = history_headline.find_next(
        'table',
        class_=re.compile(r'wikitable'),
    )
    history_row = history_table.find_all('tr')

    # Initialize Matlab versions
    versions = {}

    for row in history_row[1:]:

        # Get <td> elements
        tds = row.findAll('td')

        # Get version number
        version_number = tds[0].text
        version_number = re.sub(r'matlab\s+', '', version_number, flags=re.I)

        # Get version name
        version_name = tds[1].text

        # Make "r" in e.g. "r2007a" optional
        version_name = re.sub('r', 'r?', version_name, flags=re.I)

        # "Service Pack" -> "sp"
        version_name = re.sub(
            r'{dlm}(sp|service pack){dlm}'.format(dlm=delimiter),
            'sp',
            version_name,
            flags=re.I
        )

        # Add to versions
        versions[version_number] = [version_number]
        if version_name:
            versions[version_number].append(version_name)

    # Save results to version file
    shelf = shelve.open(version_file)
    shelf['versions'] = versions
    shelf.close()

    # Return versions
    return versions
Example #24
0
def rex_compile(rex, flags=DEF_FLAGS):
    if isinstance(rex, REX_TYPE):
        return rex
    return re.compile(rex, flags=flags)
Example #25
0
# -*- coding: utf-8 -*-

from neurotrends.config import re
from neurotrends.tagger import RexTagger, MultiRexTagger
from ..misc import delimiter

ge = RexTagger('ge', [
    r'general{dlm}electric'.format(dlm=delimiter),
])

# Use MultiRexTagger for GE to disambiguate names: e.g. GE Smith
ge_context = MultiRexTagger('ge', [
    re.compile(r'\bGE\b'),
], [
    r'mri',
    r'scan',
    r'tesla',
])

siemens = RexTagger('siemens', [r'siemens'])

philips = RexTagger('philips', [r'philips'])

bruker = RexTagger('bruker', [r'bruker'])

varian = RexTagger('varian', [r'varian\W'])
shimazdu = RexTagger('shimadzu', [r'shimazdu'])

marconi = RexTagger('marconi', [r'marconi'])
Example #26
0
from neurotrends.tagger import RexTagger, MultiRexTagger
from misc import delimiter

os_secondary_ptn = [
    r'\Wpc\W',
    r'\Wos\W',
    r'operating',
    r'platform',
    r'environment',
    r'workstation',
]

mac = RexTagger(
    'mac',
    [
        re.compile(r'OS[-/\s]*X'),
        r'\Wmac{dlm}os\W'.format(dlm=delimiter),
        r'spss{dlm}for{dlm}mac'.format(dlm=delimiter),
    ]
)

mac_context = MultiRexTagger(
    'mac',
    [
        r'apple',
        r'mac(intosh)?',
    ],
    os_secondary_ptn,
    separator='[^.,:;?]*'
)
Example #27
0
    'pdf': {
        'class': PDFDocument,
        'field': 'publisher_pdf',
    },
}


# Some PubMed records encode dates in unhelpful formats like "2006 May-Aug"
# or "1999 Jan 1-15". This pattern extracts the useful part of these strings
# ("2006 May", "1999 Jan") and discards the rest.
month_range_pattern = re.compile(
    r'''
        (
            \d{4}
            \s
            [a-z]{3}
        )
        [\s\-]
        .*
    ''',
    re.I | re.X,
)


# TODO: Test me
def parse_publication_date(date_text):
    date_text = re.sub(month_range_pattern, '\\1', date_text)
    try:
        return dateparser.parse(date_text)
    except (TypeError, ValueError, AttributeError):
        return None
Example #28
0
category = 'analysis'

from neurotrends.config import re
from neurotrends.tagger import RexTagger, MultiRexTagger
from misc import delimiter

kda = RexTagger(
    'kda',
    [
        re.compile(r'\WKDA\W'),
    ]
)

kda_context = MultiRexTagger(
    'kda',
    [
        r'kernel{dlm}density{dlm}analysis'.format(dlm=delimiter),
    ],
    [
        r'multi{dlm}level'.format(dlm=delimiter),
    ],
    separator='[^.,:;?]*'
)

mkda = RexTagger(
    'mkda',
    [
        r'''
            multi{dlm}level{dlm}kernel{dlm}density{dlm}analysis
        '''.format(dlm=delimiter),
        r'multi{dlm}level{dlm}kda'.format(dlm=delimiter),