Python downloadの例、referencer.download Pythonの例

コード例 #1

0

ファイルを表示

ファイル: google-books.py プロジェクト: mcraveiro/referencer

def resolve_metadata(doc, method):
	url = doc.get_field("url")
	
	res = re.match(pattern, url)
	url = r'https://books.google.%s/books?id=%s' % (res.group(1), res.group(2).split("&")[0])
	doc.set_field("url", url)
	
	data = referencer.download("Reading Google Books web page", "Parsing the content of the Google Books page...", url)
	
	bib = referencer.download("Fetching BiBTeX data", "Downloading BiBTeX metadata for the book...", url + "&output=bibtex")
	doc.parse_bibtex(bib)
		
	return True

コード例 #2

0

ファイルを表示

ファイル: pubmed.py プロジェクト: maximejay/Gnome-Referencer

def get_citation_from_doi(query,
                          email='*****@*****.**',
                          tool='Referencer',
                          database='pubmed'):
    params = {
        'db': database,
        'tool': tool,
        'email': email,
        'term': query + "[doi]",
        'usehistory': 'y',
        'retmax': 1
    }

    # try to resolve the PubMed ID of the DOI
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(
        params)
    data = referencer.download(_("Resolving DOI"),
                               _("Finding PubMed ID from DOI %s") % query, url)

    # parse XML output from PubMed...
    xmldoc = minidom.parseString(data)
    ids = xmldoc.getElementsByTagName('Id')

    # nothing found, exit
    if len(ids) == 0:
        raise "pubmed.get_citation_from_doi: DOI not found"

    # get ID
    id = ids[0].childNodes[0].data

    print "pubmed.get_citation_from_doi: DOI ", query, " has PubMed ID ", id

    return get_citation_from_pmid(id)

コード例 #3

0

ファイルを表示

ファイル: pubmed.py プロジェクト: mcraveiro/referencer

def get_citation_from_doi(query, email='*****@*****.**', tool='Referencer', database='pubmed'):
	params = {
		'db':database,
		'tool':tool,
		'email':email,
		'term':query + "[doi]",
		'usehistory':'y',
		'retmax':1
	}

	# try to resolve the PubMed ID of the DOI
	url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(params)
	data = referencer.download (_("Resolving DOI"), _("Finding PubMed ID from DOI %s") % query , url);

	# parse XML output from PubMed...
	xmldoc = minidom.parseString(data)
	ids = xmldoc.getElementsByTagName('Id')

	# nothing found, exit
	if len(ids) == 0:
		raise "pubmed.get_citation_from_doi: DOI not found"

	# get ID
	id = ids[0].childNodes[0].data

	print "pubmed.get_citation_from_doi: DOI ", query, " has PubMed ID ", id

	return get_citation_from_pmid (id)

コード例 #4

0

ファイルを表示

ファイル: ads.py プロジェクト: egroeper/referencer

def resolve_metadata (doc, method):
	if method != "doi":
		return False

	doi = doc.get_field("doi")
	params = {
		'data_type':"XML",
		'doi':doi
	}

	url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?" + urllib.urlencode (params)
	data = referencer.download (_("Resolving DOI"), _("Fetching metadata from NASA ADS for DOI %s") % doi, url);

	if data.find ("retrieved=\"1\"") == -1:
		print "Couldn't get info from ADS"
		return False

	fields = []
	try:
		xmldoc = minidom.parseString (data)
		fields.append (["journal", get_field(xmldoc, "journal")])
		fields.append (["title",   get_field(xmldoc, "title")])
		fields.append (["volume",  get_field(xmldoc, "volume")])

		authors = xmldoc.getElementsByTagName('author')
		authorString = ""
		first = True
		for author in authors:
			name = author.childNodes[0].data.encode("utf-8")
			if (first == False):
				authorString += " and "
			print "got author", name
			authorString += name
			first = False

		fields.append (["author", authorString])

		print "appended authors"
		pages = get_field (xmldoc, "page")
		print "getting lastPage"
		lastPage = get_field (xmldoc, "lastpage")
		if (len(lastPage) > 0):
			pages += "-"
			pages += lastPage

		print "got pages " , pages
		fields.append (["page", pages])
		print "appended pages"
	except:
		print "exception"
		return False

	for field in fields:
		if len(field[1]) > 0:
			doc.set_field(field[0], field[1]) 

	# TODO: parse pubdata element for "Jul 1989" (month and year fields)

	return True

コード例 #5

0

ファイルを表示

def resolve_metadata(doc, method):
    url = doc.get_field("url")

    res = re.match(pattern, url)
    url = r'https://books.google.%s/books?id=%s' % (res.group(1),
                                                    res.group(2).split("&")[0])
    doc.set_field("url", url)

    data = referencer.download(
        "Reading Google Books web page",
        "Parsing the content of the Google Books page...", url)

    bib = referencer.download("Fetching BiBTeX data",
                              "Downloading BiBTeX metadata for the book...",
                              url + "&output=bibtex")
    doc.parse_bibtex(bib)

    return True

コード例 #6

0

ファイルを表示

ファイル: pubmed.py プロジェクト: mcraveiro/referencer

def get_citation_from_pmid (pmid, email='*****@*****.**', tool='Referencer', database='pubmed'):
	params = {
		'db':database,
		'tool':tool,
		'email':email,
		'id':pmid,
		'retmode':'xml'
	}

	# get citation info:
	url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + urllib.urlencode(params)
	data = referencer.download (_("Resolving PubMed ID"), _("Fetching metadata from NCBI for PubMed ID %s") % pmid, url);

	return data

コード例 #7

0

ファイルを表示

ファイル: isi_plugin.py プロジェクト: cmthompson/weiss

def get_number_of_records (document):

    title = document.get_field("title")
    year = document.get_field ("year")
    author= document.get_field ("author")

    url0='http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&rspType=xml&method=search&firstRec=1&numRecs=1'
    url0+= '&query='+get_query(document) 
    data0 = referencer.download(
        _("Obtaining data from ISI-WebOfScience"), 
        _("Fetching number of ocurrences for %s/%s/%s") % (author,title,year), 
        url0)
    print data0
    xmldoc0 = minidom.parseString(data0)
    recordsFound=get_field(xmldoc0,"recordsFound")
    return int(recordsFound)

コード例 #8

0

ファイルを表示

ファイル: isi_plugin.py プロジェクト: cmthompson/weiss

    def get_data(self,document, firstrec=None, numrecs=None):
        title = document.get_field("title")
        year = document.get_field ("year")
        author= document.get_field ("author")
        if firstrec is None:
            firstrec = 1
        if numrecs is None:
            numrecs = 1

        url='http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&SID=Q1mNFhCECOk6c8aELLh&rspType=xml&method=searchRetrieve'
        url += \
            '&firstRec=' + str(firstrec) + \
            '&numRecs=' + str(numrecs) + \
            '&query=' + get_query(document)
        data = referencer.download(_("Obtaining data from ISI-WebOfScience"), 
                                   _("Fetching data for %s/%s/%s") 
                                   % (author,title,year), url);
        return data

コード例 #9

0

ファイルを表示

ファイル: google-books.py プロジェクト: maximejay/Gnome-Referencer

def resolve_metadata(doc, method):
    url = doc.get_field("url")

    res = re.match(pattern, url)
    url = r'http://books.google.%s/books?id=%s' % (res.group(1), res.group(2))
    doc.set_field("url", url)

    bibtex_url = r'http://books.google.%s/books?id=%s&output=bibtex' % (
        res.group(1), res.group(2))
    #print "url: ", repr(bibtex_url)
    bib = referencer.download("Fetching BiBTeX data",
                              "Downloading BiBTeX metadata for the book...",
                              bibtex_url)
    #print "bib:", repr(bib)
    doc.parse_bibtex(bib)

    doc.set_type("book")
    return True

コード例 #10

0

ファイルを表示

ファイル: isi-plugin.py プロジェクト: maximejay/Gnome-Referencer

def do_search(document):
    title = document.get_field("title")
    year = document.get_field("year")
    author = document.get_field("author")

    url0 = 'http://estipub.isiknowledge.com/esti/cgi?action=search&viewType=xml&mode=GeneralSearch&product=WOS&ServiceName=GeneralSearch&filter=&Start=&End=%d&DestApp=WOS' % (
        get_MAXRECORDS())
    url0 += "&" + get_query(document)
    print "isi query url:", url0
    if False:  #debugging
        #data0 = open("plugins/isi-plugin-testdata.txt").read()
        data0 = open("plugins/isi-plugin-testdata2.txt").read()
    else:
        data0 = referencer.download(
            _("Obtaining data from ISI-WebOfScience"),
            _("Querying for %s/%s/%s") % (author, title, year), url0)
    print data0
    xmldoc0 = minidom.parseString(data0)
    return xmldoc0

コード例 #11

0

ファイルを表示

ファイル: isi-plugin.py プロジェクト: mcraveiro/referencer

def do_search (document):
    title = document.get_field("title")
    year = document.get_field ("year")
    author= document.get_field ("author")

    url0='http://estipub.isiknowledge.com/esti/cgi?action=search&viewType=xml&mode=GeneralSearch&product=WOS&ServiceName=GeneralSearch&filter=&Start=&End=%d&DestApp=WOS' % (get_MAXRECORDS())
    url0+= "&" + get_query(document) 
    print "isi query url:", url0
    if False: #debugging
        #data0 = open("plugins/isi-plugin-testdata.txt").read()
        data0 = open("plugins/isi-plugin-testdata2.txt").read()
    else:
        data0 = referencer.download(
            _("Obtaining data from ISI-WebOfScience"), 
            _("Querying for %s/%s/%s") % (author,title,year), 
            url0)
    print data0
    xmldoc0 = minidom.parseString(data0)
    return xmldoc0

コード例 #12

0

ファイルを表示

def resolve_metadata(doc, method=None):
    # try with title, otherwise try with author + year
    title = doc.get_field("title")
    if title:
        searchTerms = [title]
    else:
        searchTerms = [get_first_author(doc.get_field("author"))]
        searchTerms += [doc.get_field("year")]

    searchTerm = " ".join(searchTerms)
    for c in "(),.{}!\"':=#%$/&[]+":
        searchTerm = searchTerm.replace(c, "")
    searchTerm = searchTerm.replace("-", " ")
    while searchTerm.find("  ") > 0:  #remove double spaces
        searchTerm = searchTerm.replace("  ", " ")

    #print "DBLP:searchTerm:", repr(searchTerm)

    url = "http://www.dblp.org/search/api/?%s&h=1000&c=0&f=0&format=xml" % (
        urllib.urlencode({'q': searchTerm}))
    print "DBLP:url:", repr(url)
    data = referencer.download(
        _("Searching DBLP"),
        _("Fetching metadata from DBLP for search query '%s'") % searchTerm,
        url)

    if not data:
        return False

    hits = parse_hits_get_urls(data)

    print "DBLP:hits:", hits
    if len(hits) != 1:
        #XXX, display UI?
        print "DBLP: Not exactly one hit, giving up"
        return False

    bibtex_xml = get_bibtex_xml_from_url(hits[0])
    #print bibtex_xml
    bibtex = bibtex_xml_to_bibtex(bibtex_xml)
    #print bibtex
    doc.parse_bibtex(bibtex)
    return True

コード例 #13

0

ファイルを表示

ファイル: pubmed.py プロジェクト: maximejay/Gnome-Referencer

def get_citation_from_pmid(pmid,
                           email='*****@*****.**',
                           tool='Referencer',
                           database='pubmed'):
    params = {
        'db': database,
        'tool': tool,
        'email': email,
        'id': pmid,
        'retmode': 'xml'
    }

    # get citation info:
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + urllib.urlencode(
        params)
    data = referencer.download(
        _("Resolving PubMed ID"),
        _("Fetching metadata from NCBI for PubMed ID %s") % pmid, url)

    return data

コード例 #14

0

ファイルを表示

ファイル: dblp.py プロジェクト: mcraveiro/referencer

def resolve_metadata (doc, method=None):
    # try with title, otherwise try with author + year
    title = doc.get_field("title")
    if title:
        searchTerms = [title]
    else:
        searchTerms = [get_first_author(doc.get_field("author"))]
        searchTerms += [doc.get_field("year")]

    searchTerm = " ".join(searchTerms)
    for c in "(),.{}!\"':=#%$/&[]+":
        searchTerm = searchTerm.replace(c, "")
    searchTerm = searchTerm.replace("-", " ")
    while searchTerm.find("  ") > 0: #remove double spaces
        searchTerm = searchTerm.replace("  ", " ")
    
    #print "DBLP:searchTerm:", repr(searchTerm)

    url = "http://www.dblp.org/search/api/?%s&h=1000&c=0&f=0&format=xml" % (urllib.urlencode({'q': searchTerm}))
    print "DBLP:url:", repr(url)
    data = referencer.download (_("Searching DBLP"), _("Fetching metadata from DBLP for search query '%s'") % searchTerm, url);

    if not data:
        return False

    hits = parse_hits_get_urls(data)

    print "DBLP:hits:", hits
    if len(hits) != 1:
        #XXX, display UI?
        print "DBLP: Not exactly one hit, giving up"
        return False

    bibtex_xml = get_bibtex_xml_from_url(hits[0])
    #print bibtex_xml
    bibtex = bibtex_xml_to_bibtex(bibtex_xml)
    #print bibtex
    doc.parse_bibtex(bibtex)
    return True

コード例 #15

0

ファイルを表示

ファイル: pubmed.py プロジェクト: maximejay/Gnome-Referencer

def referencer_search_TEST(search_text):
    email = '*****@*****.**'
    tool = 'Referencer'
    database = 'pubmed'

    retmax = 100

    params = {
        'db': database,
        'tool': tool,
        'email': email,
        'term': search_text,
        'usehistory': 'y',
        'retmax': retmax
    }

    # try to resolve the PubMed ID of the DOI
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(
        params)
    data = referencer.download(_("Searching pubmed"),
                               _("Searching pubmed for '%s'") % search_text,
                               url)

    # parse XML output from PubMed...
    print data
    xmldoc = minidom.parseString(data)
    ids = xmldoc.getElementsByTagName('Id')

    # nothing found, exit
    # FIXME: not really an error
    if len(ids) == 0:
        raise "pubmed.referencer_search: no results"

    webenv = xmldoc.getElementsByTagName('WebEnv')
    if len(webenv) == 0:
        raise "pubmed.referencer_search: no webenv"
    webenv = webenv[0].childNodes[0].data

    query_key = xmldoc.getElementsByTagName('QueryKey')
    if len(query_key) == 0:
        raise "pubmed.referencer_search: no query_key"
    query_key = query_key[0].childNodes[0].data

    params = {
        'db': database,
        'tool': tool,
        'email': email,
        'webenv': webenv,
        'query_key': query_key,
        'retmax': retmax
    }
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?' + urllib.urlencode(
        params)
    data = referencer.download(
        _("Retrieving pubmed summaries"),
        _("Retrieving summaries for '%s'") % search_text, url)

    xmldoc = minidom.parseString(data)

    results = []
    for docsum in xmldoc.getElementsByTagName('DocSum'):
        title = ""
        author = ""
        pmid = ""
        id = docsum.getElementsByTagName("Id")
        if len(id) != 0:
            pmid = id[0].childNodes[0].data
        else:
            raise "pubmed.referencer_search: docsum without id"

        for childnode in docsum.getElementsByTagName("Item"):
            if childnode.getAttribute("Name") == "Title":
                title = childnode.childNodes[0].data
            if childnode.getAttribute("Name") == "Author":
                author = childnode.childNodes[0].data

        results.append({"token": pmid, "title": title, "author": author})

    print results

    return results

コード例 #16

0

ファイルを表示

ファイル: pubmed.py プロジェクト: mcraveiro/referencer

def referencer_search_TEST (search_text):
	email='*****@*****.**'
   	tool='Referencer'
	database='pubmed'

	retmax = 100

	params = {
		'db':database,
		'tool':tool,
		'email':email,
		'term':search_text,
		'usehistory':'y',
		'retmax':retmax
	}

	# try to resolve the PubMed ID of the DOI
	url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(params)
	data = referencer.download (_("Searching pubmed"), _("Searching pubmed for '%s'") % search_text , url);

	# parse XML output from PubMed...
	print data
	xmldoc = minidom.parseString(data)
	ids = xmldoc.getElementsByTagName('Id')

	# nothing found, exit
	# FIXME: not really an error
	if len(ids) == 0:
		raise "pubmed.referencer_search: no results"

	webenv = xmldoc.getElementsByTagName('WebEnv')
	if len(webenv) == 0:
		raise "pubmed.referencer_search: no webenv"
	webenv = webenv[0].childNodes[0].data

	query_key = xmldoc.getElementsByTagName('QueryKey')
	if len(query_key) == 0:
		raise "pubmed.referencer_search: no query_key"
	query_key = query_key[0].childNodes[0].data

	params = {
		'db':database,
		'tool':tool,
		'email':email,
		'webenv':webenv,
		'query_key':query_key,
		'retmax':retmax
	}
	url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?' + urllib.urlencode(params)
	data = referencer.download (_("Retrieving pubmed summaries"), _("Retrieving summaries for '%s'") % search_text , url);

	xmldoc = minidom.parseString(data)

	results = []
	for docsum in xmldoc.getElementsByTagName('DocSum'):
		title = ""
		author = ""
		pmid = ""
		id = docsum.getElementsByTagName("Id")
		if len(id) !=0:
			pmid = id[0].childNodes[0].data
		else:
			raise "pubmed.referencer_search: docsum without id"

		for childnode in docsum.getElementsByTagName("Item"):
			if childnode.getAttribute("Name") == "Title":
				title = childnode.childNodes[0].data
			if childnode.getAttribute("Name") == "Author":
				author = childnode.childNodes[0].data

		results.append ({"token":pmid,"title":title,"author":author})

	print results

	return results

コード例 #17

0

ファイルを表示

ファイル: ads.py プロジェクト: mcraveiro/referencer

def resolve_metadata (doc, method):
	if method != "doi":
		return False

	doi = doc.get_field("doi")
	params = {
		'data_type':"XML",
		'doi':doi
	}

	url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?" + urllib.urlencode (params)
	data = referencer.download (_("Resolving DOI"), _("Fetching metadata from NASA ADS for DOI %s") % doi, url);

	if data.find ("retrieved=\"1\"") == -1:
		print "Couldn't get info from ADS"
		return False

	fields = []
	try:
		xmldoc = minidom.parseString (data)
		fields.append (["title",   get_field(xmldoc, "title")])
		fields.append (["volume",  get_field(xmldoc, "volume")])
		fields.append (["issue",  get_field(xmldoc, "issue")])
		fields.append (["year", get_field(xmldoc, "pubdate").partition(' ')[2]])
		fields.append (["Month", str.lower(get_field(xmldoc, "pubdate").partition(' ')[0])])
		fields.append (["Adsurl", xmldoc.getElementsByTagName('url')[-1].childNodes[0].data.encode("utf-8")])
		fields.append (["Adsbibcode",  get_field(xmldoc, "bibcode")])

		# ADS include full bibliographic information in the journal XML tag,
		# see http://doc.adsabs.harvard.edu/abs_doc/help_pages/taggedformat.html#jnl
		journal = get_field(xmldoc, "journal")
		journalString = re.sub(', [vV]ol(ume|\.).*', '', journal)
		fields.append (["journal", journalString])

		authors = xmldoc.getElementsByTagName('author')
		authorString = ""
		first = True
		for author in authors:
			name = author.childNodes[0].data.encode("utf-8")
			if (first == False):
				authorString += " and "
			print "got author", name
			authorString += name
			first = False

		fields.append (["author", authorString])

		print "appended authors"
		pages = get_field (xmldoc, "page")
		print "getting lastPage"
		lastPage = get_field (xmldoc, "lastpage")
		if (len(lastPage) > 0):
			pages += "-"
			pages += lastPage

		print "got pages " , pages
		fields.append (["pages", pages])
		print "appended pages"
	except:
		print "exception"
		return False

	for field in fields:
		if len(field[1]) > 0:
			doc.set_field(field[0], field[1]) 

	return True

コード例 #18

0

ファイルを表示

ファイル: ads.py プロジェクト: maximejay/Gnome-Referencer

def resolve_metadata(doc, method):
    if method != "doi":
        return False

    doi = doc.get_field("doi")
    params = {'data_type': "XML", 'doi': doi}

    url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?" + urllib.urlencode(
        params)
    data = referencer.download(
        _("Resolving DOI"),
        _("Fetching metadata from NASA ADS for DOI %s") % doi, url)

    if data.find("retrieved=\"1\"") == -1:
        print "Couldn't get info from ADS"
        return False

    fields = []
    try:
        xmldoc = minidom.parseString(data)
        fields.append(["journal", get_field(xmldoc, "journal")])
        fields.append(["title", get_field(xmldoc, "title")])
        fields.append(["volume", get_field(xmldoc, "volume")])
        fields.append(["issue", get_field(xmldoc, "issue")])
        fields.append(["year", get_field(xmldoc, "pubdate").partition(' ')[2]])
        fields.append([
            "Month",
            str.lower(get_field(xmldoc, "pubdate").partition(' ')[0])
        ])
        fields.append([
            "Adsurl",
            xmldoc.getElementsByTagName('url')[-1].childNodes[0].data.encode(
                "utf-8")
        ])
        fields.append(["Adsbibcode", get_field(xmldoc, "bibcode")])

        authors = xmldoc.getElementsByTagName('author')
        authorString = ""
        first = True
        for author in authors:
            name = author.childNodes[0].data.encode("utf-8")
            if (first == False):
                authorString += " and "
            print "got author", name
            authorString += name
            first = False

        fields.append(["author", authorString])

        print "appended authors"
        pages = get_field(xmldoc, "page")
        print "getting lastPage"
        lastPage = get_field(xmldoc, "lastpage")
        if (len(lastPage) > 0):
            pages += "-"
            pages += lastPage

        print "got pages ", pages
        fields.append(["pages", pages])
        print "appended pages"
    except:
        print "exception"
        return False

    for field in fields:
        if len(field[1]) > 0:
            doc.set_field(field[0], field[1])

    return True

コード例 #19

0

ファイルを表示

ファイル: dblp.py プロジェクト: mcraveiro/referencer

def get_bibtex_xml_from_url(url):
    url = url + ".xml"
    data = referencer.download (_("Searching DBLP"), _("Fetching metadata from DBLP for url '%s'") % url, url);
    return data

コード例 #20

0

ファイルを表示

def get_bibtex_xml_from_url(url):
    url = url + ".xml"
    data = referencer.download(
        _("Searching DBLP"),
        _("Fetching metadata from DBLP for url '%s'") % url, url)
    return data