Example #1
0
def create_db_entries(record):
    """Given a JSON article, create DB model objects."""
    journal, journal_created = Journal.objects.get_or_create(name=record['journal'])

    article, article_created = Article.objects.get_or_create(pubmed_url=record['pubmedUrl'],
                                                             title=record['title'],
                                                             abstract=record['abstract'],
                                                             journal=journal)

    author_order = 0
    for item in record['authors']:
        lname, initials = item.split()
        author, author_created = Author.objects.get_or_create(initials=initials,
                                                              last_name=lname)
        order, order_created = Order.objects.get_or_create(author=author,
                                                           article=article,
                                                           order=author_order)
        author_order += 1

    raw_terms = ' '.join((record['title'],
                          record['abstract']))
    clean_terms = [clean_term(term) for term in raw_terms.split()]
    if settings.USE_STOP_WORDS:
        clean_terms = [term for term in clean_terms if term not in STOP_WORDS]
    cnt = Counter(clean_terms)
    for key, frequency in cnt.iteritems():
        term, term_created = Term.objects.get_or_create(term=key)
        freq, freq_created = Frequency.objects.get_or_create(term=term,
                                                             article=article,
                                                             frequency=frequency)
Example #2
0
 def test_clean_term(self):
     words = ('Clin.', 'Chem.', 'Implementation', 'closed-loop', 'commission.')
     cleaned_words = [clean_term(word) for word in words]
     self.assertIn('clin', cleaned_words)
     self.assertIn('chem', cleaned_words)
     self.assertIn('implementation', cleaned_words)
     self.assertIn('closed-loop', cleaned_words)
     self.assertIn('commission', cleaned_words)