def Main(): '''Wrapper.''' try: # # Collecting data from UNHCR. # print '%s Collecting data from UNHCR.' % item('bullet') data = Fetch.Fetch() # # Patching data. # Epoch time doesn't seem to be 1970. # print '%s Patching data.' % item('bullet') # pdata = Patch.Epoch(data) pdata = Patch.Date(data) # # Storing data in database. # print '%s Storing records in database.' % item('bullet') CleanTable('monthly_arrivals_by_country') StoreRecords(pdata, 'monthly_arrivals_by_country') print '%s Collected data from UNHCR successfully.' % item('success') scraperwiki.status('ok') except Exception as e: print '%s UNHCR Collector failed.' % item('error') scraperwiki.status('error', 'Collection failed.')
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> bool: """Facade that handles ScraperWiki and calls project main function Args: projectmainfn ((configuration) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: bool: True = success, False = failure """ try: # # Setting up configuration # configuration = Configuration(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % configuration.get_hdx_site()) projectmainfn(configuration) except Exception as e: logger.critical(e, exc_info=True) scraperwiki.status('error', 'Run failed: %s' % sys.exc_info()[0]) return False logger.info('Run completed successfully.\n') scraperwiki.status('ok') return True
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> bool: """Facade that handles ScraperWiki and calls project main function Args: projectmainfn ((configuration) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: bool: True = success, False = failure """ try: # # Setting up configuration # configuration = Configuration(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % configuration.get_hdx_site_url()) projectmainfn(configuration) except Exception as e: logger.critical(e, exc_info=True) scraperwiki.status('error', 'Run failed: %s' % sys.exc_info()[0]) return False logger.info('Run completed successfully.\n') scraperwiki.status('ok') return True
def update_status(table_name="swdata", date_column="date"): """ Set the status endpoint on ScraperWiki to the latest entry e.g. 'Latest entry: 2013-10-01' """ status_text = 'Latest entry: {}'.format( _get_most_recent_record(table_name, date_column)) L.info(status_text) scraperwiki.status('ok', status_text)
def set_status_and_exit(status, typ, message, extra = {}): global current_status extra['status'] = status print json.dumps(extra) scraperwiki.status(typ, message) current_status = status save_status() sys.exit()
def wrapper(*args, **kwargs): try: res = func(*args, **kwargs) except Exception as e: self.logger.exception(str(e)) scraperwiki.status('error', 'Error collecting data') with open(self.logfile, 'rb') as f: self.email(text=f.read()) else: scraperwiki.status('ok') return res
def wrapper(*args, **kwargs): try: res = func(*args, **kwargs) except Exception as e: self.logger.exception(str(e)) scraperwiki.status("error", "Error collecting data") with open(self.logfile, "rb") as f: self.email(text=f.read()) else: scraperwiki.status("ok") return res
def set_status_and_exit(status, typ, message, extra={}): logging.info("Exiting with status {!r}:{!r}".format(status, message)) extra['status'] = status print json.dumps(extra) scraperwiki.status(typ, message) scraperwiki.sql.save(data={"current_status": status, "id": "global", "when": datetime.datetime.now().isoformat() }, table_name='__status', unique_keys=['id']) sys.exit()
def main(): try: if len(sys.argv) != 2: raise InvalidArgumentError("parse.py takes exactly one argument: the path to an iTunes plist. %s provided." % len(sys.argv[1:])) else: save(parse(extract(sys.argv[1]))) except Exception, e: scraperwiki.status('error', type(e).__name__) print json.dumps({ 'error': { 'type': type(e).__name__, 'message': str(e), 'trace': traceback.format_exc() } })
def main(): try: if len(sys.argv) != 2: raise InvalidArgumentError("Please supply a single argument. An example would be 'kittens'") else: search_flickr(sys.argv[1]) except Exception, e: scraperwiki.status('error', type(e).__name__) print json.dumps({ 'error': { 'type': type(e).__name__, 'message': str(e), 'trace': traceback.format_exc() } })
def main(): favs = flickr.walk(tags="kittens", extras="geo") for photo in favs: if photo.get('latitude') != '0': print photo.get('title') title = photo.get('title') print photo.get('latitude') latitude = float(photo.get('latitude')) print photo.get('longitude') longitude = float(photo.get('longitude')) print photo.get('id') identity = photo.get('id') print shorturl.url(photo.get('id')) url = shorturl.url(photo.get('id')) submit_to_scraperwiki(identity, title, latitude, longitude, url) scraperwiki.status('ok', "OK")
def main(): try: if len(sys.argv) != 2: raise InvalidArgumentError( "parse.py takes exactly one argument: the path to an iTunes plist. %s provided." % len(sys.argv[1:])) else: save(parse(extract(sys.argv[1]))) except Exception, e: scraperwiki.status('error', type(e).__name__) print json.dumps({ 'error': { 'type': type(e).__name__, 'message': str(e), 'trace': traceback.format_exc() } })
def runAllTests(): try: checkID(resource_id, api_key) checkFileName(resource_id, api_key) checkNFiles(resource_id, api_key) checkDataStore(resource_id, api_key) checkRecords(api_key) checkValue(api_key) # if everything ok print "All tests passed." scraperwiki.status('ok') except Exception as e: print e scraperwiki.status('error', 'Tests failed') os.system("mail -s 'Ebola toplines: tests failed.' [email protected]")
def main(): try: if len(sys.argv) != 2: raise InvalidArgumentError( "Please supply a single argument. An example would be 'kittens'" ) else: search_plurk(argument) except Exception, e: scraperwiki.status('error', type(e).__name__) print json.dumps({ 'error': { 'type': type(e).__name__, 'message': str(e), 'trace': traceback.format_exc() } })
def sw_excepthook(type, value, tb): """Log uncaught exceptions to scraperwiki.sqlite file.""" global _successful_exit _successful_exit = False try: first_frame_tuple = inspect.getouterframes(tb.tb_frame)[-1] (_frame, filename, _lineno, _where, _code, _) = first_frame_tuple type_name = type.__module__ + '.' + type.__name__ message = repr(value) write_runlog(filename, ''.join(traceback.format_tb(tb)), type_name, message, False, run_id) scraperwiki.status('error') finally: inner_excepthook(type, value, tb)
def Main(config_path, **kwargs): '''Wrapper.''' clean_run = kwargs.get('clean_run', True) verbose = kwargs.get('verbose', True) debug = kwargs.get('verbose', True) try: for endpoint_name in ['FCS', 'CSI', 'Income']: # # Clean records from database. # if clean_run: db.CleanTable(table_name=endpoint_name, verbose=verbose) # # Query WFP for data. # data = BuildQueue(endpoint_name, config_path, verbose=verbose) MakeRequests(data, endpoint_name, config_path, **kwargs) except Exception as e: print "%s Failed to collect data from WFP." % item('prompt_error') scraperwiki.status('error', 'Error collecting data.') os.system("mail -s 'WFP APIs: Collector failed.' [email protected]") if debug: raise if verbose: print e else: # # Success! # print "%s All data was collected successfully." % item('prompt_success') print "SW Status: Everything seems to be just fine." scraperwiki.status('ok')
def do_work(limit): #TODO: factor into master dict of colnames/css selectors scraperwiki.sql.execute("""CREATE TABLE IF NOT EXISTS people (id, source_id, scraped, name, headline, distance, num_connections, location_name, location_country_code, industry, company_name, company_type, company_size, company_industry, company_ticker, public_profile_url, picture_url)""") access_token = json.load(open('access_token.json'))['access_token'] worklist = scraperwiki.sql.select( """source.name AS name, source.id AS source_id FROM source LEFT JOIN people ON source.id = people.source_id ORDER BY scraped LIMIT ?""", [limit]) for person in worklist: # print "working on", person params = { 'keywords': person['name'], 'oauth2_access_token': access_token } fields = ("id,first-name,last-name,headline,"+ "distance,num-connections,num-connections-capped,"+ "location:(name,country:(code)),industry,"+ "positions:(company:(name,type,size,industry,ticker)),"+ "public-profile-url,"+ "picture-url") baseurl = "https://api.linkedin.com/v1/people-search:(people:(%s))" % fields r = requests.get(baseurl, params=params) r.raise_for_status() save_first_person(source_id=person['source_id'], xml=r.content) progress = scraperwiki.sql.select("""count(*) as source, (select count(*)from people) as people from source""") progress = progress[0] message = "Read %(people)d/%(source)d" % progress scraperwiki.status('ok', message)
def main(development=False): '''Wrapper.''' data = collect() try: # # Either collect data or use # previously collected data from # database. # if development is False: data = collect() pdata = patch(data) else: cursor = scraperwiki.sqlite.execute('SELECT * FROM opennepal_content') pdata = [] for record in cursor['data']: pdata.append(dict(zip(cursor['keys'], record))) # # Create static JSON files. # export_json(data=pdata) scraperwiki.status('ok') # # Send notification if scraper fails. # except Exception as e: print '%s OpenNepal Scraper failed.' % item('error') print e scraperwiki.status('error', 'Collection failed.') os.system("mail -s 'OpenNepal: Scraper failed.' [email protected]")
'User-agent': "Mozilla/5.0" "(Macintosh; Intel Mac OS X 10_11_6) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/55.0.2883.95 Safari/537.36", 'Connection': 'keep-alive' }) for anno, n_norme in norme_anno.items(): for k in range(1, n_norme + 1): norma_url = "/uri-res/N2Ls?urn:nir:{0};{1}!vig=".format( anno, k) print(norma_url) # urn e url parziali della norma process_permalinks(_get_permalinks(norma_url, session=session), session=session) scraperwiki.status('ok') # explore and solve referenced links (first passage) referenced_links = set([ res['Reference'] for res in scraperwiki.sql.select( "Reference from Nodes where Scraped = 0") ]) for link in referenced_links: process_permalinks(_get_permalinks(_get_relative_url(link), session=session), session=session) scraperwiki.status('ok')
create.CreateDatasets(dataset_dict=dataset_dict, hdx_site=p['hdx_site'], apikey=p['hdx_key'], verbose=p['verbose'], update_all_datasets=p['update_all_datasets']) create.CreateResources( resource_dict=resource_dict, hdx_site=p['hdx_site'], apikey=p['hdx_key'], verbose=p['verbose'], update_all_datasets=p['update_all_datasets']) # create.CreateGalleryItems(gallery_dict=gallery_dict, hdx_site=p['hdx_site'], apikey=p['hdx_key'], verbose=p['verbose'], update_all_datasets=p['update_all_datasets']) except Exception as e: print e return False if __name__ == '__main__': if Main() != False: print '%s OpenNepal scraper registered datasets successfully.\n' % I( 'success') scraperwiki.status('ok') else: scraperwiki.status('error', 'Failed to register resources.') os.system( "mail -s 'OpenNepal scraper failed to register datasets' [email protected]" )
import app.utilities.load as Load from app.utilities.item import item from app.collect.collect import collectData from app.utilities.store_data import storeData def main(): ''' Application wrapper. ''' dir_name = os.path.dirname(os.path.realpath(__file__)) file = os.path.join(dir_name, 'config', 'config.json') config = Load.loadJSONFile(file) for endpoint in config['endpoints']: data = collectData(endpoint['url']) storeData(data, endpoint['name']) if __name__ == '__main__': try: main() print('{success} Successully collected OCHA CERF data.'.format(success=item('success'))) scraperwiki.status('ok') except Exception as e: print('{failure} Failed to collected OCHA CERF data.'.format(failure=item('error'))) scraperwiki.status('error', 'Failed to collect data.')
def main(): """ Program wrapper. """ tables = ["pblStatsSum", "pblStatsSum4Maps"] for t in tables: m = mVAM(table=t) output = [] records = m.query() for record in records: output.append(parse(record)) store_csv(data=output, path="%s.csv" % t) store_sqlite(data=output, table=t) if __name__ == "__main__": try: main() print("%s Successfully collected mVAM data." % item("success")) scraperwiki.status("ok") except Exception as e: print("%s Failed to collected mVAM data." % item("error")) print(e) scraperwiki.status("error", "Failed to collect data.")
def update_status(): status_text = 'Last changed: {}'.format( get_most_recent_record('changes', 'datetime')) scraperwiki.status('ok', status_text)
import scraperwiki scraperwiki.status(type='error')
def test_does_nothing_if_called_outside_box(self): scraperwiki.status('ok')
return # proceed if the hash is different, i.e. update print "New data from the WHO. Send alert + grab data." pushbullet.sendAlert(pushbullet_key, PUSBULLET_PAYLOAD) os.system('bash bin/run_scraper.sh') # run the scraper def Main(p): '''Wrapper.''' downloadFile(p) checkForAlert(p) if __name__ == '__main__': # # Error handler for ScraperWiki messages. # try: Main(PATH) print "Everything seems to be just fine." scraperwiki.status('ok') except Exception as e: print e scraperwiki.status('error', 'Check for new files failed.') os.system("mail -s 'WHO Alert failed: unknown error..' [email protected]")
from utilities.hdx_format import item from ors_collect import patch as Patch from ors_collect import collect as Collect def Main(patch=True): '''Wrapper for main program.''' # # Collect data. # Collect.Main() # # Patch. # if patch: Patch.Main() if __name__ == '__main__': try: Main() print "SW Status: Everything seems to be just fine." scraperwiki.status('ok') except Exception as e: print e scraperwiki.status('error', 'Error collecting data.') os.system("echo https://ds-ec2.scraperwiki.com/3zarzzv/0zftw6fzkjxommp/http/log.txt | mail -s 'ORS APIs: Failed collecting data.' [email protected]")
offset += chunksize print('Done: %s' % offset) def Main(): '''Wrapper.''' ckan = ckanapi.RemoteCKAN(REMOTE_CKAN, apikey=apikey) resource = resources[0] upload_data_to_datastore(resource['resource_id'], resource) downloadResource(PATH) updateDatastore(PATH) if __name__ == '__main__': # # ScraperWiki error handler. # try: runEverything() print "SW Status: Everything seems to be just fine." scraperwiki.status('ok') except Exception as e: print e scraperwiki.status('error', 'Creating datastore failed') os.system("mail -s 'Ebola Case data: creating datastore failed.' [email protected]")
# Delete resources before running: if p['delete_resources']: delete.DeleteResources(dataset_dict=dataset_dict, hdx_site=p['hdx_site'], apikey=p['hdx_key'], verbose=p['verbose']) if p['update_all_datasets']: print('--------------------------------------------------') print(color(u" ATTENTION:", "blue", attrs=['bold']) + ' Updating ALL datasets.') print('--------------------------------------------------') # # Create datasets, resources, and gallery items. # create.CreateDatasets(dataset_dict=dataset_dict, hdx_site=p['hdx_site'], apikey=os.getenv('HDX_KEY'), verbose=p['verbose'], update_all_datasets=p['update_all_datasets']) create.CreateResources(resource_dict=resource_dict, hdx_site=p['hdx_site'], apikey=os.getenv('HDX_KEY'), verbose=p['verbose'], update_all_datasets=p['update_all_datasets']) # create.CreateGalleryItems(gallery_dict=gallery_dict, hdx_site=p['hdx_site'], apikey=os.getenv('HDX_KEY'), verbose=p['verbose'], update_all_datasets=p['update_all_datasets']) except Exception as e: print(e) return False if __name__ == '__main__': if main() != False: print('%s IFPRI scraper finished successfully.\n' % I('success')) scraperwiki.status('ok') else: scraperwiki.status('error', 'Failed to register resources.') os.system("mail -s 'IFPRI scraper collector failed' [email protected]")
def runEverything(p): # fetch the resources list resources = getResources(p) print '-------------------------------------' # iterating through the provided list of resources for i in range(0, len(resources)): resource = resources[i] # getting the right resource resource_id = resource['resource_id'] # getting the resource_id print "Reading resource id: " + resource_id downloadResource(p, resource_id, API_KEY) updateDatastore(p, resource_id, resource, API_KEY) print '-------------------------------------' print 'Done.' print '-------------------------------------' # Error handler for running the entire script try: runEverything(FILE_PATH) # if everything ok print "ScraperWiki Status: Everything seems to be just fine." scraperwiki.status('ok') except Exception as e: print e scraperwiki.status('error', 'Creating datastore failed') os.system( "mail -s 'Fiji Topline: creating datastore failed.' [email protected]" )
def Main(patch=True): '''Wrapper for main program.''' # # Collect data. # Collect.Main() # # Patch. # if patch: Patch.Main() if __name__ == '__main__': try: Main() print "SW Status: Everything seems to be just fine." scraperwiki.status('ok') except Exception as e: print e scraperwiki.status('error', 'Error collecting data.') os.system( "echo https://ds-ec2.scraperwiki.com/3zarzzv/0zftw6fzkjxommp/http/log.txt | mail -s 'ORS APIs: Failed collecting data.' [email protected]" )
raise InvalidArgumentError("Please supply a single argument. An example would be 'kittens'") else: search_flickr(sys.argv[1]) except Exception, e: scraperwiki.status('error', type(e).__name__) print json.dumps({ 'error': { 'type': type(e).__name__, 'message': str(e), 'trace': traceback.format_exc() } }) else: scraperwiki.status('ok') print json.dumps({ 'success': { 'type': 'ok', 'message': "Saved Flickr photo information" } }) def search_flickr(searchvalue): favs = flickr.walk(tags=searchvalue, extras="geo") rows = [] for i, photo in enumerate(favs): if photo.get('latitude') != '0': row = OrderedDict() row['id'] = photo.get('id') row['title'] = photo.get('title')
def update_status(): status_text = 'Latest entry: {}'.format( get_most_recent_record('events', 'date')) print(status_text) scraperwiki.status('ok', status_text)