예제 #1
0
    def fetch_opensky_pids (self):
        """
        implement a solrQuery Feed to generate pids

         (doi:* AND date:[2014-01-01 TO *])
         - should fetch 5184 (VERIFIED)

        """

        query = 'mods_identifier_doi_mt:*'
        query += ' AND keyDate:[2014-01-01T00:00:00Z TO *]'
        # query += ' AND keyDate:[{} TO *]'.format(get_utc_time('2014-01-01'))

        # print 'QUERY: {}'.format(query)

        args = {
            'params' :
                {
                    # 'q': affiliation_clause + ' AND ' + date_clause,
                    # 'q': 'mods_identifier_doi_mt:*'
                    'q': query
                },
            'baseUrl': CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"),
        }
        feed = feeds.SolrSearchFeed(**args)
        print '{} in feed'.format(len(feed.pids))
        return feed.pids
예제 #2
0
def getFeed():
	args = {
		'params' :
			{
				'q': 'mods_note_funding_s:*'
			},
		'baseUrl': CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"),
	}
	feed = feeds.SolrSearchFeed(**args)

	feed.numFound = len(feed.pids)
	print 'feed: %d (%d)' % (len(feed.pids), feed.size())

	feed.pids.sort()
	# for i, pid in enumerate(feed.pids):
	# 	print '- %d - %s' % (i, pid)
	return feed
예제 #3
0
def update_old_funder_records():
    """
    The Kuali backfill updated the records that also had Kuali IDs, but there are
    452 that still have old-school funder info (mods_name_corporate_funder_s:*).

    <name type="corporate">
        <namePart>National Science Foundation (NSF)</namePart>
        <role>
            <roleTerm type="text" authority="marcrelator">funder</roleTerm>
        </role>
    </name>
    <note type="funding">National Science Foundation (NSF): 1852977</note>

    Now we want to remove the corporate funder info, and try to normalize the
    award_ids with Kuali-verfied versions. If ids can't be normlized thus, then
    add a 'displayLabel' = 'Legacy funding data'  attribute

    """
    dowrites = 0

    args = {
        'params': {
            'q': 'mods_name_corporate_funder_s:*'
        },
        'baseUrl':
        CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"),
    }
    feed = feeds.SolrSearchFeed(**args)
    print 'feed has {}'.format(len(feed.pids))
    NotesMODS.dowrites = dowrites

    for pid in feed.pids:
        print pid
        mods_xml = get_datastream(pid, 'MODS')
        mods = NotesMODS(mods_xml, pid)

        # print 'BEFORE Backfill'
        # mods.show_notes()
        mods.do_back_fill([])