Beispiel #1
0
 def doi_article(ref, user=None):
     article_array = []
     fetch = PubMedFetcher()
     for doi in Reference.doi(ref):
         article = fetch.article_by_doi(doi)
         article_array.append(article)
     return article
Beispiel #2
0
    def processPMID(self, description, document, text):
        """XXX"""

        pmid = re.compile('PMID *(\d+)')
        list_pmid = pmid.findall(description)
        description = re.sub(r'\[PMID *\d+\]', '', description)
        pmcid = re.compile('PMCID *(\d+)')
        list_pmcid = pmcid.findall(description)
        description = re.sub(r'\[PMCID *\d+\]', '', description)
        para = description.split(ur'\n')
        for para_str in para:
            #print para_str
            p = document.add_paragraph('	')
            p.add_run(para_str)
        std_str = u"我们通过检测您的基因位点,使用PUBMED等国际公认参考系统,我们认为" + text + u"。"
        p = document.add_paragraph('	')
        p.add_run(std_str)

        fetch = PubMedFetcher()
        for pmid in list_pmid:
            # http://www.ncbi.nlm.nih.gov/pubmed/26471457
            pm = fetch.article_by_pmid(pmid)
            title = pm.title
            title = re.sub('\.', '', title)
            citation = '. '.join([title, pm.journal])
            p = document.add_paragraph()
            p.add_run(citation).italic = True

        for pmcid in list_pmcid:
            pm = fetch.article_by_pmcid(pmcid)
            title = pm.title
            title = re.sub('\.', '', title)
            citation = '. '.join([title, pm.journal])
            p = document.add_paragraph()
            p.add_run(citation).italic = True
Beispiel #3
0
 def pmcid_article(ref, user=None):
     article_array = []
     fetch = PubMedFetcher()
     for pmcid in Reference.pmcid(ref):
         article = fetch.article_by_pmcid(pmcid)
         article_array.append(article)
     return article
Beispiel #4
0
 def pmcid_article(ref, user=None):
     article_array = []
     fetch = PubMedFetcher()
     for pmcid in Reference.pmcid(ref):
         article = fetch.article_by_pmcid(pmcid)
         article_array.append(article)
     return article
Beispiel #5
0
def get_info_by_DOI(DOI: str) -> Dict:
    '''This function takes a DOI str, requests information about the corresponding
	article via metapub or crossref and checks if all necessary information has been retrieved.'''
    article_dict = {}
    fetch = PubMedFetcher()
    try:
        article = fetch.article_by_doi(DOI)
        # Save information in Dict
        for info in dir(article):
            if info[0] != '_':
                article_dict[info] = eval('article.' + info)
        # Add data retrieval info to the dict
        article_dict = add_retrieval_information(article_dict, 'MetaPub',
                                                 'DOI', DOI)
    except MetaPubError:
        # If it does not work via Metapub, do it via Crossref Api
        # If there is a timeout, try again (5 times)
        for _ in range(5):
            try:
                works = Works()
                article_dict = works.doi(DOI)
                break
            except:
                pass
        #article_dict = normalize_crossref_dict(article_dict)
        # Add data retrieval info to the dict
        #if contains_minimal_information(article_dict):
        article_dict = add_retrieval_information(article_dict, 'Crossref',
                                                 'DOI', DOI)
    return article_dict
Beispiel #6
0
 def doi_article(ref, user=None):
     article_array = []
     fetch = PubMedFetcher()
     for doi in Reference.doi(ref):
         article = fetch.article_by_doi(doi)
         article_array.append(article)
     return article
Beispiel #7
0
def consultametapub():
    fetch = PubMedFetcher()
    if not request.json:
        abort(400)
    pmid = request.json['id']
    article = fetch.article_by_pmid(pmid)
    return jsonify(output=article.title)
Beispiel #8
0
    def test_configurable_cachedir(self):
        """ Test that `cachedir` keyword argument is fully supported in modes:

        cachedir == 'default'   <-- assumed working since other tests use this.
        cachedir is None
        cachedir is 'some/path'
        cachedir is '~/path'
        """

        cachedir = TEST_CACHEDIR
        # start with cachedir==None; test that no cachedir is created.
        fetch = PubMedFetcher(cachedir=None)
        assert not os.path.exists(cachedir)

        fetch = PubMedFetcher(cachedir=cachedir)
        assert os.path.exists(cachedir)

        os.unlink(fetch._cache_path)
        os.rmdir(cachedir)

        fetch = PubMedFetcher(cachedir='~/testcachedir')
        assert os.path.exists(os.path.expanduser('~/testcachedir'))

        os.unlink(fetch._cache_path)
        os.rmdir(os.path.expanduser('~/testcachedir'))
Beispiel #9
0
 def pmids_from_citation(author_last_name=None,
                         year=None,
                         volume=None,
                         first_page=None,
                         journal_title=None):
     fetch = PubMedFetcher()
     return fetch.pmids_from_citation(aulast=author_last_name,
                                      year=year,
                                      volume=volume,
                                      first_page=first_page,
                                      jtitle=journal_title)
Beispiel #10
0
    def crawl_chem_abstract(self, keyword, retmax=300):
        fetch = PubMedFetcher()
        self.progress_bar_value.emit(self.count)

        pmids = fetch.pmids_for_query(keyword, retmax=retmax)

        self.textBrowser_value.emit("Scanning Iteration : " + str(retmax))
        self.textBrowser_value.emit("Expected Running Time : " + str(retmax * 2) + " seconds.")

        self.textBrowser_value.emit("PMID Scan Done!")

        json_dicts = []
        self.textBrowser_value.emit("Crawling Paper Info..")

        for i in range(len(pmids)):
            pmid = pmids[i]
            try:
                if int(i / len(pmids) * 100) > self.count:
                    self.count = int(i / len(pmids) * 100)
                    self.progress_bar_value.emit(self.count)

                try:
                    article = fetch.article_by_pmid(pmid)
                except:
                    self.textBrowser_value.emit("Error reading " + str(pmid))
                    continue

                chemical = article.chemicals
                if not chemical:
                    continue

                abstract = article.abstract.replace(",", "*")
                if not abstract:
                    continue
                elif "\t" in abstract or "\n" in abstract:
                    abstract = abstract.replace("\t", " ")
                    abstract = abstract.replace("\n", " ")

                title = article.title
                if not title:
                    continue
                elif "\t" in title or "\n" in title:
                    title = title.replace("\t", " ")
                    title = title.replace("\n", " ")

                chemical["title"] = title
                chemical["abstract"] = abstract

                json_dicts.append(chemical)
            except:
                continue

        self.textBrowser_value.emit("Progress Done!")
        return json_dicts
Beispiel #11
0
def keyword_query(keywords=sys.argv[1],
                  savepath=sys.argv[2],
                  start_date=None,
                  end_date=None,
                  num_of_articles=1000):
    """
	keyword_query takes in a keyword string or list of keywords, and outputs 
	a dataframe with article meta data that matches the keyword query.

	**NOTE**: Long queries (~1000+ articles) will take > 5 minutes. 
	Thus, it is advisable to add additional keywords and filters to constrain the 
	search space.

	:param keywords:         A string or a list of keywords to query.
	:param savepath:         A string denoting the full path to save the file in.
	:param start_date:       A string denoting the start date.
	:param end_date:         A string denoting the end date.
	:param num_of_articles:  An integer denoting the maximum number of articles.

	:return df:              A pandas dataframe of the query.
	"""

    fetch = PubMedFetcher()

    # Get PMIDs using query
    pmids = fetch.pmids_for_query(query=keywords,
                                  since=start_date,
                                  until=end_date,
                                  retmax=num_of_articles)
    print("Number of PMIDs with search query: " + str(len(pmids)))

    # Get abstracts based on keyword search.
    # The query saves to a dictionary, using the PMID as the key.
    abstracts = {}
    for id in pmids:
        article = fetch.article_by_pmid(id)
        abstracts[id] = [
            article.title, article.abstract, article.journal, article.year,
            article.authors
        ]

    # Save the dictionary as a dataframe
    df = pd.DataFrame.from_dict(
        abstracts,
        orient='index',
        columns=['Title', 'Abstract', 'Journal', 'Year', 'Authors'])

    # Save the dataframe
    df.index.name = 'PMID'
    df.to_csv(savepath)

    return df
Beispiel #12
0
def measure_all_from_query(query):
    fetch = PubMedFetcher()
    pm_ids = fetch.pmids_for_query(query)

    if len(pm_ids) > 8:
        pm_ids = pm_ids[:8]

    scores = []
    nlp = spacy.load('en_core_sci_lg')
    for id in pm_ids:
        scores.append((id, measure_similarity_abstracts(nlp, id)))

    return scores
class TestPubmedFetcher(unittest.TestCase):

    def setUp(self):
        self.fetch = PubMedFetcher()

    def tearDown(self):
        pass

    def test_pmids_for_query(self):
        params = {'journal': 'PLoS One',
                  'year': 2013,
                  'author': 'McMurry AJ'}

        pmids = self.fetch.pmids_for_query(**params)
        assert len(pmids) == 1
        assert pmids[0] == '23533569'

        # this pubmed ID was deleted
        params = {'TA': 'Journal of Neural Transmission',
                  'pdat': 2014,
                  'vol': 121,
                  'aulast': 'Freitag'
                  }

        pmids = self.fetch.pmids_for_query(**params)
        assert len(pmids) == 0

    def test_medical_genetics_query(self):
        # we presume that the results for a fixed year prior to this one will not change.
        results = self.fetch.pmids_for_medical_genetics_query('Brugada Syndrome', 'diagnosis', debug=True, year=2013)
        assert '24775617' in results

    def test_clinical_query(self):
        # we presume that the results for a fixed year prior to this one will not change.
        results = self.fetch.pmids_for_clinical_query('Global developmental delay', 'etiology', 'narrow', debug=True, year=2013)
        assert results[0] == '24257216'
        assert results[1] == '24123848'
        assert results[2] == '24089199'

    def test_specified_return_slice(self):
        pmids = self.fetch.pmids_for_query(since='2015/3/1', retmax=1000)
        assert len(pmids) == 1000

        pmids = self.fetch.pmids_for_query(since='2015/3/1', retstart=200, retmax=500)
        assert len(pmids) == 500

    def test_pmc_only(self):
        params = {'mesh': 'breast neoplasm'}
        stuff = self.fetch.pmids_for_query(since='2015/1/1', until='2015/3/1', pmc_only=True, **params)
        print(stuff)

    def test_ncbi_book_id_to_pubmed(self):
        for eg in NCBI_BOOKS:
            result = self.fetch.pmids_for_query(eg['book_id'])
            if len(result) > 0:
                assert result[0] == eg['pmid']
            else:
                assert eg['pmid'] is None
                print(eg, result)
class TestPubMedArticle(unittest.TestCase):
    def setUp(self):
        self.fetch = PubMedFetcher()

    def tearDown(self):
        pass

    def test_random_efetch(self):
        pmid = str(random.randint(22222222, 23333333))
        try:
            article = self.fetch.article_by_pmid(pmid)
            if article is not None:
                assert article.pmid == pmid
                assert article.title is not None
        except InvalidPMID:
            self.test_random_efetch()
            # print "PMID %s returned InvalidPMID response (which is totally OK). Run test again!" % pmid

    def test_init1(self):
        """
        Test on the xml returned by eutils
        """
        article = PubMedArticle(xml_str1)
        assert str(article.pmid) == '4'

    def test_init2(self):
        """
        Test on the xml downloaded from medline
        """
        article = PubMedArticle(xml_str2)
        assert str(article.pmid) == '23697015'

    def test_to_dict(self):
        article = PubMedArticle(xml_str1)
        self.assertTrue(isinstance(article.to_dict(), dict))
Beispiel #15
0
    def pmid_article(ref, user=None):
        article_array = []
        if user:
            if user.email is not None:
                for pmid in Reference.pmid(ref):
                    url = "http://www.ncbi.nlm.nih.gov/pubmed/" + str(pmid)
                    lookup = PubMedLookup(url, user.email)
                    publication = Publication(lookup)
                    article_array.append(publication)

        fetch = PubMedFetcher()
        for pmid in Reference.pmid(ref):
            article = fetch.article_by_pmid(pmid)
            article_array.append(article)
                
        return article_array
Beispiel #16
0
    def pmid_article(ref, user=None):
        article_array = []
        if user:
            if user.email is not None:
                for pmid in Reference.pmid(ref):
                    url = "http://www.ncbi.nlm.nih.gov/pubmed/" + str(pmid)
                    lookup = PubMedLookup(url, user.email)
                    publication = Publication(lookup)
                    article_array.append(publication)

        fetch = PubMedFetcher()
        for pmid in Reference.pmid(ref):
            article = fetch.article_by_pmid(pmid)
            article_array.append(article)

        return article_array
Beispiel #17
0
def _pubmed_pmid_to_article(pmid):
    """
    Use NCBI eutils to fetch pubmed article information.

    :param pmid: int or str
    :return: PubMedArticle
    """
    return PubMedFetcher().article_by_pmid(str(pmid))
Beispiel #18
0
def _pubmed_pmid_to_article(pmid):
    """
     Use eutils to fetch pubmed article information.
     TODO: integration with eutils.
    :param pmid: int or str
    :return: PubMedArticle
    """
    return PubMedFetcher('eutils').article_by_pmid(str(pmid))
Beispiel #19
0
 def __init__(self, pmid):
     self.pmid = pmid
     fetch = PubMedFetcher(email='*****@*****.**')
     article = fetch.article_by_pmid(pmid)
     self.title = article.title
     self.journal = article.journal
     self.authors = article.authors
     # pm_cited - which papers cited current paper
     try:
         self.pm_cited = fetch.related_pmids(pmid)['citedin']
     except:
         self.pm_cited = None
     self.h_index = self.get_H_index() + 1
     # self.h_index = 1
     # pm_cite - which papers cited by current paper
     self.pm_cite = []
     print("create paper with pmid" + pmid)
Beispiel #20
0
def fetch_pubmed(pub_id, id_type = "pmid"):
    """
        Fetches and formats pub data from
        pubmed
    """
    pm = PubMedFetcher()
    if id_type == 'doi':
        try:
            result = pm.article_by_doi(pub_id)
        except (AttributeError, MetaPubError, EutilsNCBIError):
            return None
    elif id_type == "pmid":
        try:
            result = pm.article_by_pmid(pub_id)
        except (AttributeError, InvalidPMID, EutilsNCBIError):
            return None
    elif id_type == "pmc":
        try:
            result = pm.article_by_pmcid('PMC' + str(pub_id))
        except (AttributeError, MetaPubError, EutilsNCBIError):
            return None
    result = result.to_dict()

    # Set link using DOI
    if result.get('doi'):
        result['url'] = "http://dx.doi.org/" + result.get('doi')
    else:
        result['url'] = result.get('url')

    # Provide PDF if possible
    if result.get('pmc'):
        result['pdf_url'] = f"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{result['pmc']}/pdf"
    

    out = {"pub_title": result.get('title'),
           "pub_authors": result.get('authors'),
           "pub_abstract": result.get('abstract'),
           "pub_doi": result.get('doi'),
           "pub_pmid": result.get('pmid'),
           "pub_pmc": pub_id if id_type == 'pmc' else None,
           "pub_url": result.get('url'),
           "pub_pdf_url": result.get('pdf_url') or 'searching',
           "pub_journal": result.get('journal'),
           "pub_date": result['history'].get('pubmed')}
    return out
Beispiel #21
0
def get_info_by_PMID(PMID: str) -> Dict:
    '''This function takes a PMID str, requests information about the corresponding
	article via metapub and checks if all necessary information has been retrieved.'''
    article_dict = {}
    fetch = PubMedFetcher()
    try:
        article = fetch.article_by_pmid(PMID)
        # Save information in Dict
        for info in dir(article):
            if info[0] != '_':
                article_dict[info] = eval('article.' + info)
    except MetaPubError:
        pass
    #if contains_minimal_information(article_dict):
    # Add data retrieval info to the dict and return it
    article_dict = add_retrieval_information(article_dict, 'MetaPub', 'PMID',
                                             PMID)
    return article_dict
Beispiel #22
0
def _pubmed_central_pmcid_to_article(pmcid):
    """
    Specific to PMC PubMed Central.
     Use eutils to fetch pubmed article information.
     TODO: integration with eutils.
    :param pmcid:
    :return: PubMedArticle
    """
    return PubMedFetcher('eutils').article_by_pmcid(str(pmcid))
Beispiel #23
0
def _pubmed_central_pmcid_to_article(pmcid):
    """
    Specific to PMC PubMed Central.
    Use NCBI eutils to fetch pubmed article information.

    :param pmcid:
    :return: PubMedArticle
    """
    return PubMedFetcher().article_by_pmcid(str(pmcid))
def paper2dict(pmid, doi):
    """Sync paper's information into prophet database."""
    # For single paper
    from metapub import PubMedFetcher
    fetch = PubMedFetcher()
    if not (input_file or pmid or doi):
        print(bcolors.FAIL)
        print("You need to specified -i/-p/-d.", bcolors.ENDC)
        sys.exit(1)
    else:
        article = fetch_paper(pmid=pmid, doi=doi)
        print(obj2dict(article))
Beispiel #25
0
    def downloadAbstract(self, keywords, file_name,max_return=1e+6):
        fetcher = PubMedFetcher(cachedir=self.cache_dir, api_key=self.api_key)
        pmids = fetcher.pmids_for_query(keywords, retmax=max_return)
        
        corpus = ET.Element('corpus')
        keywords_item = ET.SubElement(corpus, 'keywords')
        keywords_item.text = keywords
        
        for pmid in pmids:
            print(pmid)
            fetcher._eutils_article_by_pmid(pmid)
            doc = fetcher.article_by_pmid(pmid)
            title_str = self.removeHtmlTags(doc.title)
            abstract_str = self.removeHtmlTags(doc.abstract)
            
            if abstract_str == '':
                continue
            
            doc_item = ET.SubElement(corpus, 'article')
            doc_item.set('id', pmid)
            
            title_item = ET.SubElement(doc_item, 'title')
            title_item.text = title_str

            abstract_item = ET.SubElement(doc_item, 'abstract')
            abstract_item.text = abstract_str
            
        corpus_in_string = ET.tostring(corpus)
        xml_file = open(file_name, 'wb')
        xml_file.write(corpus_in_string)
def get_reference_from_pmid_by_metapub(pmid:str)->dict:
    fetch = PubMedFetcher(cachedir=cache)
    reference = None
    try:
        time.sleep(0.34)
        article = fetch.article_by_pmid(pmid)
        reference = {'journal':article.journal,
                     'authors': article.authors,
                     'issue':article.issue,
                     'first_page':article.first_page,
                     'last_page': article.last_page,
                     'volume':article.volume,
                     'year': str(article.year),
                     'abstract': replace_characters(article.abstract),
                     'title': replace_characters(article.title),
                     'doi': article.doi,
                     'pmid': article.pmid
                     }
    except:
        print('*** Bad PMID:',pmid)

    return reference
Beispiel #27
0
def filter_results(results, words_in_tilte, limit):
    fetch = PubMedFetcher(email='*****@*****.**')
    filtered_results = []
    counter = 0
    for paper in results:
        pmid = paper.split('/')[-1].split('\n')[0]
        article = fetch.article_by_pmid(pmid)
        for words in words_in_tilte:
            include = False
            for word in words:
                if word.strip().lower() in article.title.lower():
                    include = True
                    continue
            if not include:
                break
        if include:
            filtered_results.append(paper)
            counter += 1
        if counter == limit:
            return filtered_results

    return filtered_results
Beispiel #28
0
    def crawl_chem_json(self, keyword, retmax=300):
        fetch = PubMedFetcher()

        pmids = fetch.pmids_for_query(keyword, retmax=retmax)

        self.textBrowser_value.emit("Scanning Iteration : " + str(retmax))
        self.textBrowser_value.emit("Expected Running Time : " + str(retmax * 2) + " seconds.")

        self.textBrowser_value.emit("PMID Scan Done!")
        self.progress_bar_value.emit(self.count)

        json_dicts = []
        self.textBrowser_value.emit("Crawling Paper Info..")

        for i in range(len(pmids)):
            pmid = pmids[i]
            try:
                if int(i / len(pmids) * 100) > self.count:
                    self.count = int(i / len(pmids) * 100)
                    self.progress_bar_value.emit(self.count)
                try:
                    article = fetch.article_by_pmid(pmid)
                except:
                    self.textBrowser_value.emit("Error reading " + str(pmid))
                    continue

                chemical = article.chemicals
                if not chemical:
                    continue

                json_dicts.append(chemical)
            except:
                continue

        self.textBrowser_value.emit("Progress Done!")
        return json_dicts
Beispiel #29
0
def search(request):
    ctx = {
        'query_saved' : None,
        'saved_pmids':[],
        'total_saved_queries':SearchStash.objects.filter(user=request.user).count(),
    }
    f = PubMedFetcher()
    initial = {}
    query_saved = None
    try:
        query_saved = SearchStash.objects.get(search_used=request.GET.get('q'))
    except: pass
    else:
        ctx['saved_pmids'] = [pub.pmid for pub in query_saved.pmids.all()]
        ctx['query_saved'] = query_saved

    if not ctx['query_saved'] and request.GET.get('q'):
        messages.add_message(request, messages.INFO, '<strong>Note:</strong> You must click "Save Query" above to start capturing publications for this query.')
    if request.GET.get('q', None):
        keywords = request.GET.get('q', None)
        initial['q'] = request.GET.get('q')
        pmids = f.pmids_for_query(query=keywords, retmax=100)
        pmid_list = []
        for pmid in pmids:
            new_pmid = Publication.objects.get_or_create(pmid=pmid)[0]
            row = {
                'pmid': new_pmid.pmid,
            }
            pmid_list.append(row)
        ctx['keywords'] = keywords
        ctx['pmids'] = pmids
        ctx['pmid_list'] = pmid_list
        ctx['result_count'] = len(pmids)
    form = PubMedForm(initial=initial)
    ctx['form'] = form
    return render(request, 'lum/search.html', ctx)
Beispiel #30
0
def measure_similarity_abstracts(nlp, pmid):
    def scrape_related_abstracts(pm_id):
        related_ids = scrape_related_ids(pm_id)

        if len(related_ids) > 8:
            related_ids = related_ids[:8]

        abstracts = []

        for related in related_ids:
            starter = 'https://pubmed.ncbi.nlm.nih.gov/'
            link = starter + related

            data = requests.get(link).text
            soup = BeautifulSoup(data, 'html.parser')
            abstract_header = soup.find('div', {'id': 'en-abstract'})
            try:
                abstract = str(abstract_header.p.string).strip()
                abstracts.append(abstract)
            except:
                pass

        return abstracts

    fetch = PubMedFetcher()
    exemplary = fetch.article_by_pmid(pmid).abstract

    doc1 = nlp(exemplary)

    scores = []

    for abstract in scrape_related_abstracts(pmid):
        doc2 = nlp(abstract)
        scores.append(doc1.similarity(doc2))

    return mean(scores)
Beispiel #31
0
def crawl_chem_json(keyword, retmax=1000):
    fetch = PubMedFetcher()

    pmids = fetch.pmids_for_query(keyword, retmax=retmax)
    print("PMID scan Done!")

    json_dicts = []
    print("Crawling Paper Info..")

    for pmid in tqdm(pmids):
        try:
            article = fetch.article_by_pmid(pmid)
        except:
            print("Error reading " + str(pmid))
            continue

        chemical = article.chemicals
        if not chemical:
            continue

        json_dicts.append(chemical)

    print("Process Done!")
    return json_dicts
Beispiel #32
0
    def test_article_by_pmid(self):
        pmid = '4'
        fetch = PubMedFetcher()
        article = fetch.article_by_pmid(pmid)
        assert str(article.pmid) == pmid

        pmid = '25763451'
        fetch = PubMedFetcher()
        article = fetch.article_by_pmid(pmid)
        assert str(article.pmid) == pmid
Beispiel #33
0
def search(entry):
    fetch = PubMedFetcher()
    try:
        article = fetch.article_by_pmid(entry['pmid'])
    except:
        try:
            article = fetch.article_by_pmcid(entry['pmcid'])
        except:
            try:
                article = fetch.article_by_doi(entry['doi'])
            except:
                try:
                    pmids = fetch.pmids_for_citation(authors=entry['author'], journal=entry['journal'], year=entry['year'], volume=entry['volume'])
                    # pmids2 = fetch.pmids_for_query(entry['title'])
                    article = fetch.article_by_pmid(pmids[0])
                except:
                    return None
    return article
Beispiel #34
0
 def search(source = "PubMed", level = "basic", db = "PubMed", query = None, unlabeled_string = None, affiliation = None, article_identifier = None, all_fields = None, author = None, author_identifier = None, book = None, corporate_author = None, create_date = None, completion_date = None, conflict_of_interest = None, ec_rn_number = None, editor = None, entrez_date = None, filter_citations = None, first_author_name = None, full_author_name = None, full_investigator_name = None, grant_number = None, investigator = None, isbn = None, issue = None, journal = None, language = None, last_author = None, location_id = None, mesh_date = None, mesh_major_topic = None, mesh_subheadings = None, mesh_terms = None, modification_date = None, nlm_unique_id = None, other_term = None, owner = None, pagination = None, personal_name_as_subject = None, pharmacological_action = None, place_of_publication = None, pmid = None, publisher = None, publication_date = None, publication_type = None, retmax = None, retmode = None, secondary_source_id = None, sort = None, subset = None, supplementary_concept = None, text_words = None, title = None, title_abstract = None, transliterated_title = None, uid = None, volume = None, raw = False, exact = False, user = None):
     
     if source.lower() in ["pubmed"] and level.lower() == "complex":
         
         return eutils_search(db = db, retmode = retmode, retmax = retmax, sort = sort, unlabeled_string = unlabeled_string, affiliation = affiliation, article_identifier = article_identifier, all_fields = all_fields, author = author, author_identifier = author_identifier, book = book, corporate_author = corporate_author, create_date = create_date, completion_date = completion_date, conflict_of_interest = conflict_of_interest, ec_rn_number = ec_rn_number, editor = editor, entrez_date = entrez_date, filter_citations = filter_citations, first_author_name = first_author_name, full_author_name = full_author_name, full_investigator_name = full_investigator_name, grant_number = grant_number, investigator = investigator, isbn = isbn, issue = issue, journal = journal, language = language, last_author = last_author, location_id = location_id, mesh_date = mesh_date, mesh_major_topic = mesh_major_topic, mesh_subheadings = mesh_subheadings, mesh_terms = mesh_terms, modification_date = modification_date, nlm_unique_id = nlm_unique_id, other_term = other_term, owner = owner, pagination = pagination, personal_name_as_subject = personal_name_as_subject, pharmacological_action = pharmacological_action, place_of_publication = place_of_publication, pmid = pmid, publisher = publisher, publication_date = publication_date, publication_type = publication_type, secondary_source_id = secondary_source_id, subset = subset, supplementary_concept = supplementary_concept, text_words = text_words, title = title, title_abstract = title_abstract, transliterated_title = transliterated_title, uid = uid, volume = volume, raw = raw, exact = exact)
     
     elif source.lower() in ["pubmed"] and level.lower() == "basic":
         
         # Use 'unlabeled_string' or 'query' here.
         # This function already takes completed
         # PubMed queries as strings (with
         # various connectors and constructors).
         if unlabeled_string:
             
             fetch = PubMedFetcher()
             pubmed_id_list = fetch.pmids_for_query(unlabeled_string)
             ref_list = []
             for pubmed_id in pubmed_id_list:
                 article = fetch.article_by_pmid(pubmed_id) # Need a faster way to get titles...
                 temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed", name = article.title)
                 ref_list.append(temp_ref)
             return ref_list
         elif query:
             
             # This is where the basic reference
             # search redirects for now, but it
             # is relatively slow.
             fetch = PubMedFetcher()
             pubmed_id_list = fetch.pmids_for_query(query)
             ref_list = []
             for pubmed_id in pubmed_id_list:
                 try:
                     article = fetch.article_by_pmid(pubmed_id) # Need a faster way to get titles...
                     temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed", name = article.title)
                     ref_list.append(temp_ref)
                 except metapub.exceptions.InvalidPMID:
                     print("An invalid PMID error occurred.")
                     temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed")
                     ref_list.append(temp_ref)
                 else:
                     temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed")
                     ref_list.append(temp_ref)
             return ref_list
     
     elif source.lower() in ["google", "google scholar"]:
         return google_scholar_search(unlabeled_string)
     
     elif source.lower() in ["openlibrary"]:
         return openlibrary_search(unlabeled_string)
Beispiel #35
0
 def __init__(self, cache_file_name, hIndex_filename, no_index_filename,
              label_filename, label_ratio_thresh, output_filename, workers,
              rsp):
     self.paper_info = {}
     self.workers = workers
     self.label_filename = label_filename
     if workers:
         self.label_data_with_workers(label_filename)
     else:
         self.label_data(label_filename)
     self.paper_cache = PaperCache(cache_file_name)
     self.hIndex = HIndex(hIndex_filename)
     self.label_ratio_thresh = label_ratio_thresh
     self.output_filename = output_filename
     self.fetcher = PubMedFetcher(email='*****@*****.**')
     paper_builder = PaperBuilder(self.hIndex, self.paper_cache,
                                  self.fetcher, no_index_filename)
     self.papers_network = P_N(list(self.paper_info.keys()), paper_builder)
     for pmid, p in self.papers_network.csv_papers_dict.items():
         self.paper_info[pmid]['Journal hIndex'] = p.h_index
         self.paper_info[pmid]['year'] = p.year
     self.rsp = rsp
Beispiel #36
0
    def handle(self, *args, **options):
        csvfile = "pmids.csv"
        with open(csvfile, 'rU') as f:
            reader = csv.reader(f)

            count = 0
            for row in reader:
                pmid = self.get_pmid(row)

                if count == 0:
                    count += 1
                    continue #skip first header row
                if Publication.objects.filter(pmid=pmid).count() > 0:
                    count += 1
                    continue
                if count == 3000: break
                ref_id = row[0]
                ref_type = row[1]

                year = row[3]
                article_title = row[4]
                secondary_author = row[5]
                journal_title = row[6]
                place_published = row[7]
                publisher = row[8]
                volume = row[9]
                issue = row[10]
                pages = row[11]
                date = row[12] #bad data in csv, don't use...
                alt_journal = row[13]
                doi = self.get_doi(row)
                print doi
                #pmid_from_ref = row[15]
                #pmid_from_updates = row[16]

                abstract = row[17]


                url = row[18]
                file_attachments = row[19]
                author_address_from_pubmed = row[20] #empty column
                figure = row[21]
                cis_acc = row[22]
                access_date = row[23]
                luminex_product = row[24]
                db_name = row[25]
                db_provider = row[26]
                language = row[27]
                reprint_author_name = row[28]
                blank = row[29]
                reprint_author_email = row[30]



                ecopy = row[39]
                paper_type = row[40]
                species = row[41]
                assay = row[42]
                sample_type = row[43] #this is the article title...
                whos_kit = row[44]
                misc = row[45]
                application = row[46]
                market_segment = row[47]
                subsidiary_author = row[48]
                custom_6 = row[49]

                issn = row[51]


                pub = Publication(
                    title=sample_type,
                    pmid=pmid,
                    doi=doi,
                    abstract=abstract,
                )
                fresh_data = None
                if len(pmid) < 1:
                    fetch = PubMedFetcher()
                    try:
                        fresh_data = fetch.article_by_doi(doi)
                        fresh_data = fresh_data.to_dict()
                    except:
                        pass
                    else:
                        pub.pmid = fresh_data['pmid']

                pub.save()
                self.add_cis_tags(pub, row)
                authors = self.get_authors(row)
                for author in authors:
                    pub.authors.add(author)
                lab = self.get_lab(row)

                if lab:
                    for author in authors:
                        author.labs.add(lab)
                    pub.labs.add(lab)




                count += 1
Beispiel #37
0
 def pmids_from_citation(author_last_name = None, year = None, volume = None, first_page = None, journal_title = None):
     fetch = PubMedFetcher()
     return fetch.pmids_from_citation(aulast = author_last_name, year = year, volume = volume, first_page = first_page, jtitle = journal_title)
Beispiel #38
0
def psearch(pmid):
    fetch = PubMedFetcher()
    ret = fetch.article_by_pmid(pmid)
    print ret.to_dict()