def addAllDataToSolrFromUrl(sourceUrl, patternUrl, solrUrl): """ add data to a Solr index crawling events from a URL """ data = getCoordinatesForAllData(sourceUrl, patternUrl) EventsPortal.addDataToSolr(data, solrUrl)
def addAllDataToSolrFromUrl(sourceUrl,patternUrl,solrUrl): """ add data to a Solr index crawling events from a URL """ data = getCoordinatesForAllData(sourceUrl,patternUrl) EventsPortal.addDataToSolr(data, solrUrl)
def getCoordinatesForAllData(sourceUrl, patternUrl): data = EventsPortal.getAllEventsData(sourceUrl, patternUrl) for docs in data: latitudeField = docs['latitude'] if latitudeField == None: geocity = docs['locationCity'] geolocator = Nominatim() locatorCity = geocity location = geolocator.geocode(locatorCity) latitude = location.latitude longitude = location.longitude docs['latitudetest'] = latitude docs['longitudetest'] = longitude else: pass return data
def getCoordinatesForAllData(sourceUrl,patternUrl): data = EventsPortal.getAllEventsData(sourceUrl,patternUrl) for docs in data: latitudeField = docs['latitude'] if latitudeField == None: geocity = docs['locationCity'] geolocator = Nominatim() locatorCity = geocity location = geolocator.geocode(locatorCity) latitude = location.latitude longitude = location.longitude docs['latitudetest'] = latitude docs['longitudetest'] = longitude else: pass return data
__author__ = 'chuqiao' import EventsPortal # DELETE ALL DATA # EventsPortal.deleteDataInSolr() # ADD DATA FROM 2 SORCES # EventsPortal.addDataToSolrFromUrl("http://www.elixir-europe.org:8080/events", "http://www.elixir-europe.org:8080/events") # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "http://bioevents-portal.org/events","localhost:8983/solr/event_portal") # DELETE DATA FROM 1 SOURCE EventsPortal.deleteDataInSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All","139.162.217.53:8983/solr/eventsportal") # EventsPortal.deleteDataInSolrByQuery('source:("http://www.elixir-europe.org:8080/events")') # EventsPortal.deleteDataInSolrByQuery('source:("http://localhost/ep/events" AND "state=published" AND "field_type_tid=All")')
# add the handlers to the logger logger.addHandler(ch) logger.addHandler(fh) # EventsPortal.addDataToSolrFromUrl("http://www.elixir-europe.org:8080/events", "http://www.elixir-europe.org:8080/events"); logger() # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "http://bioevents-portal.org/events","http://139.162.217.53:8983/solr/eventsportal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull", "http://bioevents-portal.org/events","http://139.162.217.53:8983/solr/eventsportal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/upcoming?state=published&field_type_tid=All", "http://bioevents-portal.org/events","http://139.162.217.53:8983/solr/eventsportal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull", "http://bioevents-portal.org/events","http://localhost:8983/solr/event_portal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "http://bioevents-portal.org/events","139.162.217.53:8983/solr/eventsportal/") if __name__ == '__main__': logger.info('start at %s' % datetime.now()) EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull", "http://bioevents-portal.org/events", "139.162.217.53:8983/solr/eventsportal/") logger.info('finish at %s' % datetime.now())
# create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger logger.addHandler(ch) logger.addHandler(fh) # EventsPortal.addDataToSolrFromUrl("http://www.elixir-europe.org:8080/events", "http://www.elixir-europe.org:8080/events"); logger() # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "http://bioevents-portal.org/events","http://139.162.217.53:8983/solr/eventsportal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull", "http://bioevents-portal.org/events","http://139.162.217.53:8983/solr/eventsportal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/upcoming?state=published&field_type_tid=All", "http://bioevents-portal.org/events","http://139.162.217.53:8983/solr/eventsportal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull", "http://bioevents-portal.org/events","http://localhost:8983/solr/event_portal"); # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "http://bioevents-portal.org/events","139.162.217.53:8983/solr/eventsportal/") if __name__ == '__main__': logger.info('start at %s' % datetime.now()) EventsPortal.addDataToSolrFromUrl( "http://bioevents-portal.org/eventsfull", "http://bioevents-portal.org/events", "139.162.217.53:8983/solr/eventsportal/") logger.info('finish at %s' % datetime.now())
__author__ = 'chuqiao' import EventsPortal # DELETE ALL DATA # EventsPortal.deleteDataInSolr() # ADD DATA FROM 2 SORCES # EventsPortal.addDataToSolrFromUrl("http://www.elixir-europe.org:8080/events", "http://www.elixir-europe.org:8080/events") # EventsPortal.addDataToSolrFromUrl("http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "http://bioevents-portal.org/events","localhost:8983/solr/event_portal") # DELETE DATA FROM 1 SOURCE EventsPortal.deleteDataInSolrFromUrl( "http://bioevents-portal.org/eventsfull/test?state=published&field_type_tid=All", "139.162.217.53:8983/solr/eventsportal") # EventsPortal.deleteDataInSolrByQuery('source:("http://www.elixir-europe.org:8080/events")') # EventsPortal.deleteDataInSolrByQuery('source:("http://localhost/ep/events" AND "state=published" AND "field_type_tid=All")')
__author__ = 'chuqiao' import EventsPortal EventsPortal.deleteDataInSolr("http://139.162.217.53:8983/solr/eventsportal")