def get_expression( journal_name=None, text_terms=None, mesh_terms=None, from_date=None, to_date=None, custom_expression=None, author_name=None): expression_parts = [] if journal_name: expression_parts.append('"%s"[Journal]' % journal_name) if text_terms or mesh_terms: terms = [] terms.extend('"%s"[Text Word]' % x for x in text_terms or []) terms.extend('"%s"[MeSH Terms]' % x for x in mesh_terms or []) expression_parts.append(' OR '.join(terms)) if custom_expression: expression_parts.append(custom_expression) if author_name: expression_parts.append('%s[Author]' % author_name) if from_date: from_date_string = from_date.strftime( '%Y/%m/%d') to_date_string = to_date.strftime( '%Y/%m/%d') if to_date else '3000' expression_parts.append( '"%s"[Date - Publication] : "%s"[Date - Publication]' % ( from_date_string, to_date_string)) if len(expression_parts) <= 1: expression = ''.join(expression_parts) else: expression = '(%s)' % ') AND ('.join(expression_parts) return compact_whitespace(expression)
def get_expression( journal_name=None, text_terms=None, mesh_terms=None, from_date=None, to_date=None, custom_expression=None): expression_parts = [] if journal_name: expression_parts.append('"%s"[Journal]' % journal_name) if text_terms or mesh_terms: terms = [] terms.extend('"%s"[Text Word]' % x for x in text_terms or []) terms.extend('"%s"[MeSH Terms]' % x for x in mesh_terms or []) expression_parts.append(' OR '.join(terms)) if custom_expression: expression_parts.append(custom_expression) if from_date: from_date_string = from_date.strftime( '%Y/%m/%d') to_date_string = to_date.strftime( '%Y/%m/%d') if to_date else '3000' expression_parts.append( '"%s"[Date - Publication] : "%s"[Date - Publication]' % ( from_date_string, to_date_string)) if len(expression_parts) <= 1: expression = ''.join(expression_parts) else: expression = '(%s)' % ') AND ('.join(expression_parts) return compact_whitespace(expression)
def get_expression(journal_name=None, text_terms=None, mesh_terms=None, from_date=None, to_date=None, custom_expression=None, author_name=None): """ Retrieve expression based on inputs. Expressions are constructed in this layout: ("%s"[Journal]) AND ("%s"[Text Word] OR "%s"[Text Word] OR "%s"[MeSH Terms] OR "%s"[MeSH Terms]) AND ("%s"[Date - Publication] : "%s"[Date - Publication]) """ expression_parts = [] if journal_name: expression_parts.append('"{0}"[Journal]'.format(journal_name)) if author_name: match = PATTERN_AFFILIATION.search(author_name) if match: author_name, affiliation_string = match.groups() affiliations = [x.strip() for x in affiliation_string.split(',')] affiliation_expression = ' OR '.join('{0}[Affiliation]'.format(x) for x in affiliations) expression_parts.append('{0}[Author] AND ({1})'.format( author_name, affiliation_expression)) else: expression_parts.append('{0}[Author]'.format(author_name)) if custom_expression: expression_parts.append(custom_expression) if text_terms or mesh_terms: terms = [] terms.extend('"{0}"[Text Word]'.format(x) for x in text_terms or []) terms.extend('"{0}"[MeSH Terms]'.format(x) for x in mesh_terms or []) expression_parts.append(' OR '.join(terms)) if from_date: from_date_string = from_date.strftime('%Y/%m/%d') to_date_string = to_date.strftime('%Y/%m/%d') if to_date else '3000' expression_parts.append( '"{0}"[Date - Publication] : "{1}"[Date - Publication]'.format( from_date_string, to_date_string)) if len(expression_parts) <= 1: expression = ''.join(expression_parts) else: expression = '({0})'.format(') AND ('.join(expression_parts)) return compact_whitespace(expression)
def get_expression( journal_name=None, text_terms=None, mesh_terms=None, from_date=None, to_date=None, custom_expression=None, author_name=None): """ Retrieve expression based on inputs. Expressions are constructed in this layout: ("%s"[Journal]) AND ("%s"[Text Word] OR "%s"[Text Word] OR "%s"[MeSH Terms] OR "%s"[MeSH Terms]) AND ("%s"[Date - Publication] : "%s"[Date - Publication]) """ expression_parts = [] if journal_name: expression_parts.append('"{0}"[Journal]'.format(journal_name)) if author_name: match = PATTERN_AFFILIATION.search(author_name) if match: author_name, affiliation_string = match.groups() affiliations = [x.strip() for x in affiliation_string.split(',')] affiliation_expression = ' OR '.join( '{0}[Affiliation]'.format(x) for x in affiliations) expression_parts.append('{0}[Author] AND ({1})'.format( author_name, affiliation_expression)) else: expression_parts.append('{0}[Author]'.format(author_name)) if custom_expression: expression_parts.append(custom_expression) if text_terms or mesh_terms: terms = [] terms.extend('"{0}"[Text Word]'.format(x) for x in text_terms or []) terms.extend('"{0}"[MeSH Terms]'.format(x) for x in mesh_terms or []) expression_parts.append(' OR '.join(terms)) if from_date: from_date_string = from_date.strftime( '%Y/%m/%d') to_date_string = to_date.strftime( '%Y/%m/%d') if to_date else '3000' expression_parts.append( '"{0}"[Date - Publication] : "{1}"[Date - Publication]'.format( from_date_string, to_date_string)) if len(expression_parts) <= 1: expression = ''.join(expression_parts) else: expression = '({0})'.format(') AND ('.join(expression_parts)) return compact_whitespace(expression)
def get_expression(journal_name=None, text_terms=None, mesh_terms=None, from_date=None, to_date=None, custom_expression=None, author_name=None): """ Retrieve expression based on inputs. Expressions are constructed in this layout: ("%s"[Journal]) AND ("%s"[Text Word] OR "%s"[Text Word] OR "%s"[MeSH Terms] OR "%s"[MeSH Terms]) AND ("%s"[Date - Publication] : "%s"[Date - Publication]) """ expression_parts = [] if journal_name: expression_parts.append('"%s"[Journal]' % journal_name) if author_name: expression_parts.append('%s[Author]' % author_name) if custom_expression: expression_parts.append(custom_expression) if text_terms or mesh_terms: terms = [] terms.extend('"%s"[Text Word]' % x for x in text_terms or []) terms.extend('"%s"[MeSH Terms]' % x for x in mesh_terms or []) expression_parts.append(' OR '.join(terms)) if from_date: from_date_string = from_date.strftime('%Y/%m/%d') to_date_string = to_date.strftime('%Y/%m/%d') if to_date else '3000' expression_parts.append( '"%s"[Date - Publication] : "%s"[Date - Publication]' % (from_date_string, to_date_string)) if len(expression_parts) <= 1: expression = ''.join(expression_parts) else: expression = '(%s)' % ') AND ('.join(expression_parts) return compact_whitespace(expression)
def test_compact_whitespace(): assert compact_whitespace(' x y z ') == 'x y z'
import csv from collections import Counter from invisibleroads_macros.disk import make_folder from invisibleroads_macros.text import compact_whitespace from os.path import join from sys import argv target_folder, text_path = argv[1:] character_counter = Counter(compact_whitespace(open(text_path).read())) del character_counter[' '] target_path = join(make_folder(target_folder), 'character_count.csv') csv_writer = csv.writer(open(target_path, 'w')) csv_writer.writerows(character_counter.most_common()) print('character_count_table_path = ' + target_path)