Ejemplo n.º 1
0
def get_expression(
        journal_name=None, text_terms=None, mesh_terms=None,
        from_date=None, to_date=None, custom_expression=None,
        author_name=None):
    expression_parts = []
    if journal_name:
        expression_parts.append('"%s"[Journal]' % journal_name)
    if text_terms or mesh_terms:
        terms = []
        terms.extend('"%s"[Text Word]' % x for x in text_terms or [])
        terms.extend('"%s"[MeSH Terms]' % x for x in mesh_terms or [])
        expression_parts.append(' OR '.join(terms))
    if custom_expression:
        expression_parts.append(custom_expression)
    if author_name:
        expression_parts.append('%s[Author]' % author_name)
    if from_date:
        from_date_string = from_date.strftime(
            '%Y/%m/%d')
        to_date_string = to_date.strftime(
            '%Y/%m/%d') if to_date else '3000'
        expression_parts.append(
            '"%s"[Date - Publication] : "%s"[Date - Publication]' % (
                from_date_string, to_date_string))
    if len(expression_parts) <= 1:
        expression = ''.join(expression_parts)
    else:
        expression = '(%s)' % ') AND ('.join(expression_parts)
    return compact_whitespace(expression)
Ejemplo n.º 2
0
def get_expression(
        journal_name=None, text_terms=None, mesh_terms=None,
        from_date=None, to_date=None, custom_expression=None):
    expression_parts = []
    if journal_name:
        expression_parts.append('"%s"[Journal]' % journal_name)
    if text_terms or mesh_terms:
        terms = []
        terms.extend('"%s"[Text Word]' % x for x in text_terms or [])
        terms.extend('"%s"[MeSH Terms]' % x for x in mesh_terms or [])
        expression_parts.append(' OR '.join(terms))
    if custom_expression:
        expression_parts.append(custom_expression)
    if from_date:
        from_date_string = from_date.strftime(
            '%Y/%m/%d')
        to_date_string = to_date.strftime(
            '%Y/%m/%d') if to_date else '3000'
        expression_parts.append(
            '"%s"[Date - Publication] : "%s"[Date - Publication]' % (
                from_date_string, to_date_string))
    if len(expression_parts) <= 1:
        expression = ''.join(expression_parts)
    else:
        expression = '(%s)' % ') AND ('.join(expression_parts)
    return compact_whitespace(expression)
Ejemplo n.º 3
0
def get_expression(journal_name=None,
                   text_terms=None,
                   mesh_terms=None,
                   from_date=None,
                   to_date=None,
                   custom_expression=None,
                   author_name=None):
    """
    Retrieve expression based on inputs.
    Expressions are constructed in this layout:

    ("%s"[Journal]) AND ("%s"[Text Word] OR
    "%s"[Text Word] OR "%s"[MeSH Terms] OR
        "%s"[MeSH Terms]) AND
        ("%s"[Date - Publication] : "%s"[Date - Publication])
    """
    expression_parts = []
    if journal_name:
        expression_parts.append('"{0}"[Journal]'.format(journal_name))
    if author_name:
        match = PATTERN_AFFILIATION.search(author_name)
        if match:
            author_name, affiliation_string = match.groups()
            affiliations = [x.strip() for x in affiliation_string.split(',')]
            affiliation_expression = ' OR '.join('{0}[Affiliation]'.format(x)
                                                 for x in affiliations)
            expression_parts.append('{0}[Author] AND ({1})'.format(
                author_name, affiliation_expression))
        else:
            expression_parts.append('{0}[Author]'.format(author_name))
    if custom_expression:
        expression_parts.append(custom_expression)
    if text_terms or mesh_terms:
        terms = []
        terms.extend('"{0}"[Text Word]'.format(x) for x in text_terms or [])
        terms.extend('"{0}"[MeSH Terms]'.format(x) for x in mesh_terms or [])
        expression_parts.append(' OR '.join(terms))
    if from_date:
        from_date_string = from_date.strftime('%Y/%m/%d')
        to_date_string = to_date.strftime('%Y/%m/%d') if to_date else '3000'
        expression_parts.append(
            '"{0}"[Date - Publication] : "{1}"[Date - Publication]'.format(
                from_date_string, to_date_string))
    if len(expression_parts) <= 1:
        expression = ''.join(expression_parts)
    else:
        expression = '({0})'.format(') AND ('.join(expression_parts))
    return compact_whitespace(expression)
Ejemplo n.º 4
0
def get_expression(
        journal_name=None, text_terms=None, mesh_terms=None,
        from_date=None, to_date=None, custom_expression=None,
        author_name=None):
    """
    Retrieve expression based on inputs.
    Expressions are constructed in this layout:

    ("%s"[Journal]) AND ("%s"[Text Word] OR
    "%s"[Text Word] OR "%s"[MeSH Terms] OR
        "%s"[MeSH Terms]) AND
        ("%s"[Date - Publication] : "%s"[Date - Publication])
    """
    expression_parts = []
    if journal_name:
        expression_parts.append('"{0}"[Journal]'.format(journal_name))
    if author_name:
        match = PATTERN_AFFILIATION.search(author_name)
        if match:
            author_name, affiliation_string = match.groups()
            affiliations = [x.strip() for x in affiliation_string.split(',')]
            affiliation_expression = ' OR '.join(
                '{0}[Affiliation]'.format(x) for x in affiliations)
            expression_parts.append('{0}[Author] AND ({1})'.format(
                author_name, affiliation_expression))
        else:
            expression_parts.append('{0}[Author]'.format(author_name))
    if custom_expression:
        expression_parts.append(custom_expression)
    if text_terms or mesh_terms:
        terms = []
        terms.extend('"{0}"[Text Word]'.format(x) for x in text_terms or [])
        terms.extend('"{0}"[MeSH Terms]'.format(x) for x in mesh_terms or [])
        expression_parts.append(' OR '.join(terms))
    if from_date:
        from_date_string = from_date.strftime(
            '%Y/%m/%d')
        to_date_string = to_date.strftime(
            '%Y/%m/%d') if to_date else '3000'
        expression_parts.append(
            '"{0}"[Date - Publication] : "{1}"[Date - Publication]'.format(
                from_date_string, to_date_string))
    if len(expression_parts) <= 1:
        expression = ''.join(expression_parts)
    else:
        expression = '({0})'.format(') AND ('.join(expression_parts))
    return compact_whitespace(expression)
Ejemplo n.º 5
0
def get_expression(journal_name=None,
                   text_terms=None,
                   mesh_terms=None,
                   from_date=None,
                   to_date=None,
                   custom_expression=None,
                   author_name=None):
    """
    Retrieve expression based on inputs.
    Expressions are constructed in this layout:

    ("%s"[Journal]) AND ("%s"[Text Word] OR
    "%s"[Text Word] OR "%s"[MeSH Terms] OR
        "%s"[MeSH Terms]) AND
        ("%s"[Date - Publication] : "%s"[Date - Publication])
    """
    expression_parts = []
    if journal_name:
        expression_parts.append('"%s"[Journal]' % journal_name)
    if author_name:
        expression_parts.append('%s[Author]' % author_name)
    if custom_expression:
        expression_parts.append(custom_expression)
    if text_terms or mesh_terms:
        terms = []
        terms.extend('"%s"[Text Word]' % x for x in text_terms or [])
        terms.extend('"%s"[MeSH Terms]' % x for x in mesh_terms or [])
        expression_parts.append(' OR '.join(terms))
    if from_date:
        from_date_string = from_date.strftime('%Y/%m/%d')
        to_date_string = to_date.strftime('%Y/%m/%d') if to_date else '3000'
        expression_parts.append(
            '"%s"[Date - Publication] : "%s"[Date - Publication]' %
            (from_date_string, to_date_string))
    if len(expression_parts) <= 1:
        expression = ''.join(expression_parts)
    else:
        expression = '(%s)' % ') AND ('.join(expression_parts)
    return compact_whitespace(expression)
def test_compact_whitespace():
    assert compact_whitespace('  x  y  z  ') == 'x y z'
Ejemplo n.º 7
0
import csv
from collections import Counter
from invisibleroads_macros.disk import make_folder
from invisibleroads_macros.text import compact_whitespace
from os.path import join
from sys import argv

target_folder, text_path = argv[1:]
character_counter = Counter(compact_whitespace(open(text_path).read()))
del character_counter[' ']
target_path = join(make_folder(target_folder), 'character_count.csv')
csv_writer = csv.writer(open(target_path, 'w'))
csv_writer.writerows(character_counter.most_common())
print('character_count_table_path = ' + target_path)
def test_compact_whitespace():
    assert compact_whitespace('  x  y  z  ') == 'x y z'