def create_db_entries(record): """Given a JSON article, create DB model objects.""" journal, journal_created = Journal.objects.get_or_create(name=record['journal']) article, article_created = Article.objects.get_or_create(pubmed_url=record['pubmedUrl'], title=record['title'], abstract=record['abstract'], journal=journal) author_order = 0 for item in record['authors']: lname, initials = item.split() author, author_created = Author.objects.get_or_create(initials=initials, last_name=lname) order, order_created = Order.objects.get_or_create(author=author, article=article, order=author_order) author_order += 1 raw_terms = ' '.join((record['title'], record['abstract'])) clean_terms = [clean_term(term) for term in raw_terms.split()] if settings.USE_STOP_WORDS: clean_terms = [term for term in clean_terms if term not in STOP_WORDS] cnt = Counter(clean_terms) for key, frequency in cnt.iteritems(): term, term_created = Term.objects.get_or_create(term=key) freq, freq_created = Frequency.objects.get_or_create(term=term, article=article, frequency=frequency)
def test_clean_term(self): words = ('Clin.', 'Chem.', 'Implementation', 'closed-loop', 'commission.') cleaned_words = [clean_term(word) for word in words] self.assertIn('clin', cleaned_words) self.assertIn('chem', cleaned_words) self.assertIn('implementation', cleaned_words) self.assertIn('closed-loop', cleaned_words) self.assertIn('commission', cleaned_words)