def test_keep_parens(): """ Leave in parentheses, which are often legitimate. """ assert prettify_field('(abc) def') == '(abc) def' assert prettify_field('abc (def)') == 'abc (def)'
def test_space_on_left(): """ Strip whitespace from the left of a string. """ assert prettify_field(' abc') == 'abc'
def test_punctuation_on_right(): """ Strip punctuation from the right of a string. """ assert prettify_field('abc.;,') == 'abc'
def test_punctuation_on_left(): """ Strip punctuation from the left of a string. """ assert prettify_field('.;,abc') == 'abc'
def rank_texts(keywords=None, state=None, institution=None): """ Pull text rankings. Args: keywords (str) state (str) institution (int) Returns: list: A ranked list of texts. """ ranking = Ranking() # Filter by keywords: if keywords: ranking.filter_keywords(keywords) # Filter by state: if state: ranking.filter_state(state) # Filter by institution: if institution: ranking.filter_institution(institution) results = ranking.rank() texts = [] for r in results['ranks']: record = r['record'] texts.append({ 'id': record.id, 'title': prettify_field(record.marc.title()), 'author': prettify_field(record.marc.author()), 'publisher': prettify_field(record.marc.publisher()), 'rank': r['rank'], 't_count': record.metadata['citation_count'], 'f_count': record.count, }) return { 'count': results['count'], 'texts': texts }
def es_doc(self): """ Construct a document for Elasticsearch. Returns: dict: The document fields. """ return { '_id': self.control_number, 'author': prettify_field(self.marc.author()), 'title': prettify_field(self.marc.title()), 'publisher': prettify_field(self.marc.publisher()), 'count': self.metadata['citation_count'], 'rank': self.metadata['rank'], }
def rank_texts(keywords=None, state=None, institution=None): """ Pull text rankings. Args: keywords (str) state (str) institution (int) Returns: list: A ranked list of texts. """ ranking = Ranking() # Filter by keywords: if keywords: ranking.filter_keywords(keywords) # Filter by state: if state: ranking.filter_state(state) # Filter by institution: if institution: ranking.filter_institution(institution) results = ranking.rank() texts = [] for r in results['ranks']: record = r['record'] texts.append({ 'id': record.id, 'title': prettify_field(record.marc.title()), 'author': prettify_field(record.marc.author()), 'publisher': prettify_field(record.marc.publisher()), 'rank': r['rank'], 't_count': record.metadata['citation_count'], 'f_count': record.count, }) return {'count': results['count'], 'texts': texts}
def hydrate_nodes(self): """ Hydrate node metadata. """ for cn in bar(self.graph.nodes()): # Pop out the HLOM record. text = HLOM_Record.get(HLOM_Record.control_number==cn) # Prettify the title / author. title = prettify_field(text.marc.title()) author = prettify_field(text.marc.author()) publisher = prettify_field(text.marc.publisher()) pubyear = prettify_field(text.marc.pubyear()) self.graph.node[cn]['title'] = title or '' self.graph.node[cn]['author'] = author or '' self.graph.node[cn]['publisher'] = publisher or '' self.graph.node[cn]['pubyear'] = pubyear or ''