Exemple #1
0
def Main():
    '''Wrapper.'''

    try:
        #
        # Collecting data from UNHCR.
        #
        print '%s Collecting data from UNHCR.' % item('bullet')
        data = Fetch.Fetch()

        #
        # Patching data.
        # Epoch time doesn't seem to be 1970.
        #
        print '%s Patching data.' % item('bullet')
        # pdata = Patch.Epoch(data)
        pdata = Patch.Date(data)

        #
        # Storing data in database.
        #
        print '%s Storing records in database.' % item('bullet')
        CleanTable('monthly_arrivals_by_country')
        StoreRecords(pdata, 'monthly_arrivals_by_country')

        print '%s Collected data from UNHCR successfully.' % item('success')
        scraperwiki.status('ok')

    except Exception as e:
        print '%s UNHCR Collector failed.' % item('error')
        scraperwiki.status('error', 'Collection failed.')
def Main():
  '''Wrapper.'''

  try:
    #
    # Collecting data from UNHCR.
    #
    print '%s Collecting data from UNHCR.' % item('bullet')
    data = Fetch.Fetch()

    #
    # Patching data.
    # Epoch time doesn't seem to be 1970.
    #
    print '%s Patching data.' % item('bullet')
    # pdata = Patch.Epoch(data)
    pdata = Patch.Date(data)

    #
    # Storing data in database.
    #
    print '%s Storing records in database.' % item('bullet')
    CleanTable('monthly_arrivals_by_country')
    StoreRecords(pdata, 'monthly_arrivals_by_country')

    print '%s Collected data from UNHCR successfully.' % item('success')
    scraperwiki.status('ok')

  except Exception as e:
    print '%s UNHCR Collector failed.' % item('error')
    scraperwiki.status('error', 'Collection failed.')
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> bool:
    """Facade that handles ScraperWiki and calls project main function

    Args:
        projectmainfn ((configuration) -> None): main function of project
        **kwargs: configuration parameters to pass to HDX Configuration class

    Returns:
        bool: True = success, False = failure
    """

    try:
        #
        # Setting up configuration
        #
        configuration = Configuration(**kwargs)

        logger.info('--------------------------------------------------')
        logger.info('> HDX Site: %s' % configuration.get_hdx_site())

        projectmainfn(configuration)

    except Exception as e:
        logger.critical(e, exc_info=True)
        scraperwiki.status('error', 'Run failed: %s' % sys.exc_info()[0])
        return False
    logger.info('Run completed successfully.\n')
    scraperwiki.status('ok')
    return True
Exemple #4
0
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> bool:
    """Facade that handles ScraperWiki and calls project main function

    Args:
        projectmainfn ((configuration) -> None): main function of project
        **kwargs: configuration parameters to pass to HDX Configuration class

    Returns:
        bool: True = success, False = failure
    """

    try:
        #
        # Setting up configuration
        #
        configuration = Configuration(**kwargs)

        logger.info('--------------------------------------------------')
        logger.info('> HDX Site: %s' % configuration.get_hdx_site_url())

        projectmainfn(configuration)

    except Exception as e:
        logger.critical(e, exc_info=True)
        scraperwiki.status('error', 'Run failed: %s' % sys.exc_info()[0])
        return False
    logger.info('Run completed successfully.\n')
    scraperwiki.status('ok')
    return True
def update_status(table_name="swdata", date_column="date"):
    """
    Set the status endpoint on ScraperWiki to the latest entry e.g.
    'Latest entry: 2013-10-01'
    """
    status_text = 'Latest entry: {}'.format(
        _get_most_recent_record(table_name, date_column))
    L.info(status_text)

    scraperwiki.status('ok', status_text)
def set_status_and_exit(status, typ, message, extra = {}):
    global current_status

    extra['status'] = status
    print json.dumps(extra)

    scraperwiki.status(typ, message)

    current_status = status
    save_status()

    sys.exit()
Exemple #7
0
        def wrapper(*args, **kwargs):
            try:
                res = func(*args, **kwargs)
            except Exception as e:
                self.logger.exception(str(e))
                scraperwiki.status('error', 'Error collecting data')

                with open(self.logfile, 'rb') as f:
                    self.email(text=f.read())
            else:
                scraperwiki.status('ok')
                return res
Exemple #8
0
        def wrapper(*args, **kwargs):
            try:
                res = func(*args, **kwargs)
            except Exception as e:
                self.logger.exception(str(e))
                scraperwiki.status("error", "Error collecting data")

                with open(self.logfile, "rb") as f:
                    self.email(text=f.read())
            else:
                scraperwiki.status("ok")
                return res
def set_status_and_exit(status, typ, message, extra={}):
    logging.info("Exiting with status {!r}:{!r}".format(status, message))
    extra['status'] = status
    print json.dumps(extra)
    scraperwiki.status(typ, message)
    scraperwiki.sql.save(data={"current_status": status,
                               "id": "global",
                               "when": datetime.datetime.now().isoformat()
                               },
                         table_name='__status',
                         unique_keys=['id'])

    sys.exit()
def main():
    try:
        if len(sys.argv) != 2:
            raise InvalidArgumentError("parse.py takes exactly one argument: the path to an iTunes plist. %s provided." % len(sys.argv[1:]))
        else:
            save(parse(extract(sys.argv[1])))

    except Exception, e:
        scraperwiki.status('error', type(e).__name__)
        print json.dumps({
            'error': {
                'type': type(e).__name__,
                'message': str(e),
                'trace': traceback.format_exc()
            }
        })
def main():
    try:
        if len(sys.argv) != 2:
            raise InvalidArgumentError("Please supply a single argument. An example would be 'kittens'")
        else:
            search_flickr(sys.argv[1])

    except Exception, e:
        scraperwiki.status('error', type(e).__name__)
        print json.dumps({
            'error': {
                'type': type(e).__name__,
                'message': str(e),
                'trace': traceback.format_exc()
            }
        })
Exemple #12
0
def main():
    favs = flickr.walk(tags="kittens", extras="geo")
    for photo in favs:
        if photo.get('latitude') != '0':
            print photo.get('title')
            title = photo.get('title')
            print photo.get('latitude')
            latitude = float(photo.get('latitude'))
            print photo.get('longitude')
            longitude = float(photo.get('longitude'))
            print photo.get('id')
            identity = photo.get('id')
            print shorturl.url(photo.get('id'))
            url = shorturl.url(photo.get('id'))
            submit_to_scraperwiki(identity, title, latitude, longitude, url)

    scraperwiki.status('ok', "OK")
def main():
    try:
        if len(sys.argv) != 2:
            raise InvalidArgumentError(
                "parse.py takes exactly one argument: the path to an iTunes plist. %s provided."
                % len(sys.argv[1:]))
        else:
            save(parse(extract(sys.argv[1])))

    except Exception, e:
        scraperwiki.status('error', type(e).__name__)
        print json.dumps({
            'error': {
                'type': type(e).__name__,
                'message': str(e),
                'trace': traceback.format_exc()
            }
        })
def runAllTests():

	try:
		checkID(resource_id, api_key)
		checkFileName(resource_id, api_key)
		checkNFiles(resource_id, api_key)
		checkDataStore(resource_id, api_key)
		checkRecords(api_key)
		checkValue(api_key)

		# if everything ok
		print "All tests passed."
		scraperwiki.status('ok')

	except Exception as e:
		print e
		scraperwiki.status('error', 'Tests failed')
		os.system("mail -s 'Ebola toplines: tests failed.' [email protected]")
Exemple #15
0
def main():
    try:
        if len(sys.argv) != 2:
            raise InvalidArgumentError(
                "Please supply a single argument. An example would be 'kittens'"
            )
        else:
            search_plurk(argument)

    except Exception, e:
        scraperwiki.status('error', type(e).__name__)
        print json.dumps({
            'error': {
                'type': type(e).__name__,
                'message': str(e),
                'trace': traceback.format_exc()
            }
        })
Exemple #16
0
    def sw_excepthook(type, value, tb):
        """Log uncaught exceptions to scraperwiki.sqlite file."""

        global _successful_exit
        _successful_exit = False

        try:
            first_frame_tuple = inspect.getouterframes(tb.tb_frame)[-1]
            (_frame, filename, _lineno, _where, _code, _) = first_frame_tuple

            type_name = type.__module__ + '.' + type.__name__

            message = repr(value)

            write_runlog(filename, ''.join(traceback.format_tb(tb)),
                type_name, message, False, run_id)

            scraperwiki.status('error')
        finally:
            inner_excepthook(type, value, tb)
Exemple #17
0
    def sw_excepthook(type, value, tb):
        """Log uncaught exceptions to scraperwiki.sqlite file."""

        global _successful_exit
        _successful_exit = False

        try:
            first_frame_tuple = inspect.getouterframes(tb.tb_frame)[-1]
            (_frame, filename, _lineno, _where, _code, _) = first_frame_tuple

            type_name = type.__module__ + '.' + type.__name__

            message = repr(value)

            write_runlog(filename, ''.join(traceback.format_tb(tb)), type_name,
                         message, False, run_id)

            scraperwiki.status('error')
        finally:
            inner_excepthook(type, value, tb)
def Main(config_path, **kwargs):
  '''Wrapper.'''
  clean_run = kwargs.get('clean_run', True)
  verbose = kwargs.get('verbose', True)
  debug = kwargs.get('verbose', True)

  try:
    for endpoint_name in ['FCS', 'CSI', 'Income']:

      #
      # Clean records from database.
      #
      if clean_run:
        db.CleanTable(table_name=endpoint_name, verbose=verbose)

      #
      # Query WFP for data.
      #
      data = BuildQueue(endpoint_name, config_path, verbose=verbose)
      MakeRequests(data, endpoint_name, config_path, **kwargs)

  except Exception as e:
    print "%s Failed to collect data from WFP." % item('prompt_error')
    scraperwiki.status('error', 'Error collecting data.')
    os.system("mail -s 'WFP APIs: Collector failed.' [email protected]")

    if debug:
      raise

    if verbose:
      print e
  else:

    #
    # Success!
    #
    print "%s All data was collected successfully." % item('prompt_success')
    print "SW Status: Everything seems to be just fine."
    scraperwiki.status('ok')
def do_work(limit):
    #TODO: factor into master dict of colnames/css selectors
    scraperwiki.sql.execute("""CREATE TABLE IF NOT EXISTS
      people (id, source_id, scraped, name, headline, distance,
              num_connections, 
              location_name, location_country_code,
              industry, company_name, company_type,
              company_size, company_industry, company_ticker,
              public_profile_url,
              picture_url)""")
    access_token = json.load(open('access_token.json'))['access_token']
    worklist = scraperwiki.sql.select(
      """source.name AS name, source.id AS source_id
        FROM source LEFT JOIN people
        ON source.id = people.source_id ORDER BY scraped
        LIMIT ?""", [limit])
    for person in worklist:
        # print "working on", person
        params = {
          'keywords': person['name'],
          'oauth2_access_token': access_token
        }
        fields = ("id,first-name,last-name,headline,"+
          "distance,num-connections,num-connections-capped,"+
          "location:(name,country:(code)),industry,"+
          "positions:(company:(name,type,size,industry,ticker)),"+
          "public-profile-url,"+
          "picture-url")
        baseurl = "https://api.linkedin.com/v1/people-search:(people:(%s))" % fields
        r = requests.get(baseurl, params=params)
        r.raise_for_status()
        save_first_person(source_id=person['source_id'], xml=r.content)
        progress = scraperwiki.sql.select("""count(*) as source,
          (select count(*)from people) as people from source""")
        progress = progress[0]
        message = "Read %(people)d/%(source)d" % progress
        scraperwiki.status('ok', message)
Exemple #20
0
def main(development=False):
  '''Wrapper.'''

  data = collect()

  try:
    #
    # Either collect data or use
    # previously collected data from
    # database.
    #
    if development is False:
      data = collect()
      pdata = patch(data)

    else:
      cursor = scraperwiki.sqlite.execute('SELECT * FROM opennepal_content')
      pdata = []
      for record in cursor['data']:
        pdata.append(dict(zip(cursor['keys'], record)))

    #
    # Create static JSON files.
    #
    export_json(data=pdata)
    scraperwiki.status('ok')


  #
  # Send notification if scraper fails.
  #
  except Exception as e:
    print '%s OpenNepal Scraper failed.' % item('error')
    print e
    scraperwiki.status('error', 'Collection failed.')
    os.system("mail -s 'OpenNepal: Scraper failed.' [email protected]")
Exemple #21
0
            'User-agent':
            "Mozilla/5.0"
            "(Macintosh; Intel Mac OS X 10_11_6) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/55.0.2883.95 Safari/537.36",
            'Connection':
            'keep-alive'
        })

        for anno, n_norme in norme_anno.items():
            for k in range(1, n_norme + 1):
                norma_url = "/uri-res/N2Ls?urn:nir:{0};{1}!vig=".format(
                    anno, k)
                print(norma_url)

                # urn e url parziali della norma
                process_permalinks(_get_permalinks(norma_url, session=session),
                                   session=session)
                scraperwiki.status('ok')

        # explore and solve referenced links (first passage)
        referenced_links = set([
            res['Reference'] for res in scraperwiki.sql.select(
                "Reference from Nodes where Scraped = 0")
        ])
        for link in referenced_links:
            process_permalinks(_get_permalinks(_get_relative_url(link),
                                               session=session),
                               session=session)
            scraperwiki.status('ok')
            create.CreateDatasets(dataset_dict=dataset_dict,
                                  hdx_site=p['hdx_site'],
                                  apikey=p['hdx_key'],
                                  verbose=p['verbose'],
                                  update_all_datasets=p['update_all_datasets'])
            create.CreateResources(
                resource_dict=resource_dict,
                hdx_site=p['hdx_site'],
                apikey=p['hdx_key'],
                verbose=p['verbose'],
                update_all_datasets=p['update_all_datasets'])
            # create.CreateGalleryItems(gallery_dict=gallery_dict, hdx_site=p['hdx_site'], apikey=p['hdx_key'], verbose=p['verbose'], update_all_datasets=p['update_all_datasets'])

        except Exception as e:
            print e
            return False


if __name__ == '__main__':

    if Main() != False:
        print '%s OpenNepal scraper registered datasets successfully.\n' % I(
            'success')
        scraperwiki.status('ok')

    else:
        scraperwiki.status('error', 'Failed to register resources.')
        os.system(
            "mail -s 'OpenNepal scraper failed to register datasets' [email protected]"
        )
Exemple #23
0
import app.utilities.load as Load

from app.utilities.item import item
from app.collect.collect import collectData
from app.utilities.store_data import storeData

def main():
  '''
  Application wrapper.

  '''
  dir_name = os.path.dirname(os.path.realpath(__file__))
  file = os.path.join(dir_name, 'config', 'config.json')
  config = Load.loadJSONFile(file)

  for endpoint in config['endpoints']:
    data = collectData(endpoint['url'])
    storeData(data, endpoint['name'])


if __name__ == '__main__':
  try:
    main()
    print('{success} Successully collected OCHA CERF data.'.format(success=item('success')))
    scraperwiki.status('ok')

  except Exception as e:
    print('{failure} Failed to collected OCHA CERF data.'.format(failure=item('error')))
    scraperwiki.status('error', 'Failed to collect data.')


def main():
    """
    Program wrapper.

    """
    tables = ["pblStatsSum", "pblStatsSum4Maps"]
    for t in tables:
        m = mVAM(table=t)

        output = []
        records = m.query()
        for record in records:
            output.append(parse(record))

        store_csv(data=output, path="%s.csv" % t)
        store_sqlite(data=output, table=t)


if __name__ == "__main__":
    try:
        main()
        print("%s Successfully collected mVAM data." % item("success"))
        scraperwiki.status("ok")

    except Exception as e:
        print("%s Failed to collected mVAM data." % item("error"))
        print(e)
        scraperwiki.status("error", "Failed to collect data.")
def update_status():
    status_text = 'Last changed: {}'.format(
        get_most_recent_record('changes', 'datetime'))
    scraperwiki.status('ok', status_text)
import scraperwiki
scraperwiki.status(type='error')
 def test_does_nothing_if_called_outside_box(self):
     scraperwiki.status('ok')
        return

    # proceed if the hash is different, i.e. update
    print "New data from the WHO. Send alert + grab data."
    pushbullet.sendAlert(pushbullet_key, PUSBULLET_PAYLOAD)
    os.system('bash bin/run_scraper.sh')  # run the scraper


def Main(p):
    '''Wrapper.'''

    downloadFile(p)
    checkForAlert(p)



if __name__ == '__main__':
    
    #
    # Error handler for ScraperWiki messages.
    #
    try:
        Main(PATH)
        print "Everything seems to be just fine."
        scraperwiki.status('ok')

    except Exception as e:
        print e
        scraperwiki.status('error', 'Check for new files failed.')
        os.system("mail -s 'WHO Alert failed: unknown error..' [email protected]")
Exemple #29
0
from utilities.hdx_format import item
from ors_collect import patch as Patch
from ors_collect import collect as Collect

def Main(patch=True):
  '''Wrapper for main program.'''

  #
  # Collect data.
  #
  Collect.Main()

  #
  # Patch.
  #
  if patch:
    Patch.Main()

if __name__ == '__main__':

  try:
      Main()
      print "SW Status: Everything seems to be just fine."
      scraperwiki.status('ok')

  except Exception as e:
      print e
      scraperwiki.status('error', 'Error collecting data.')
      os.system("echo https://ds-ec2.scraperwiki.com/3zarzzv/0zftw6fzkjxommp/http/log.txt | mail -s 'ORS APIs: Failed collecting data.' [email protected]")
          offset += chunksize
          print('Done: %s' % offset)


def Main():
  '''Wrapper.'''

  ckan = ckanapi.RemoteCKAN(REMOTE_CKAN, apikey=apikey)
  resource = resources[0]
  upload_data_to_datastore(resource['resource_id'], resource)
  downloadResource(PATH)
  updateDatastore(PATH)




if __name__ == '__main__':
  
  #
  # ScraperWiki error handler.
  #
  try:
    runEverything()
    print "SW Status: Everything seems to be just fine."
    scraperwiki.status('ok')

  except Exception as e:
    print e
    scraperwiki.status('error', 'Creating datastore failed')
    os.system("mail -s 'Ebola Case data: creating datastore failed.' [email protected]")
      # Delete resources before running:
      if p['delete_resources']:
        delete.DeleteResources(dataset_dict=dataset_dict, hdx_site=p['hdx_site'], apikey=p['hdx_key'], verbose=p['verbose'])

      if p['update_all_datasets']:
        print('--------------------------------------------------')
        print(color(u" ATTENTION:", "blue", attrs=['bold']) + ' Updating ALL datasets.')
        print('--------------------------------------------------')

      #
      # Create datasets, resources, and gallery items.
      #
      create.CreateDatasets(dataset_dict=dataset_dict, hdx_site=p['hdx_site'], apikey=os.getenv('HDX_KEY'), verbose=p['verbose'], update_all_datasets=p['update_all_datasets'])
      create.CreateResources(resource_dict=resource_dict, hdx_site=p['hdx_site'], apikey=os.getenv('HDX_KEY'), verbose=p['verbose'], update_all_datasets=p['update_all_datasets'])
      # create.CreateGalleryItems(gallery_dict=gallery_dict, hdx_site=p['hdx_site'], apikey=os.getenv('HDX_KEY'), verbose=p['verbose'], update_all_datasets=p['update_all_datasets'])

    except Exception as e:
      print(e)
      return False


if __name__ == '__main__':

  if main() != False:
    print('%s IFPRI scraper finished successfully.\n' % I('success'))
    scraperwiki.status('ok')

  else:
    scraperwiki.status('error', 'Failed to register resources.')
    os.system("mail -s 'IFPRI scraper collector failed' [email protected]")
def runEverything(p):
    # fetch the resources list
    resources = getResources(p)
    print '-------------------------------------'

    # iterating through the provided list of resources
    for i in range(0, len(resources)):
        resource = resources[i]  # getting the right resource
        resource_id = resource['resource_id']  # getting the resource_id
        print "Reading resource id: " + resource_id
        downloadResource(p, resource_id, API_KEY)
        updateDatastore(p, resource_id, resource, API_KEY)
    print '-------------------------------------'
    print 'Done.'
    print '-------------------------------------'


# Error handler for running the entire script
try:
    runEverything(FILE_PATH)
    # if everything ok
    print "ScraperWiki Status: Everything seems to be just fine."
    scraperwiki.status('ok')

except Exception as e:
    print e
    scraperwiki.status('error', 'Creating datastore failed')
    os.system(
        "mail -s 'Fiji Topline: creating datastore failed.' [email protected]"
    )
Exemple #33
0

def Main(patch=True):
    '''Wrapper for main program.'''

    #
    # Collect data.
    #
    Collect.Main()

    #
    # Patch.
    #
    if patch:
        Patch.Main()


if __name__ == '__main__':

    try:
        Main()
        print "SW Status: Everything seems to be just fine."
        scraperwiki.status('ok')

    except Exception as e:
        print e
        scraperwiki.status('error', 'Error collecting data.')
        os.system(
            "echo https://ds-ec2.scraperwiki.com/3zarzzv/0zftw6fzkjxommp/http/log.txt | mail -s 'ORS APIs: Failed collecting data.' [email protected]"
        )
            raise InvalidArgumentError("Please supply a single argument. An example would be 'kittens'")
        else:
            search_flickr(sys.argv[1])

    except Exception, e:
        scraperwiki.status('error', type(e).__name__)
        print json.dumps({
            'error': {
                'type': type(e).__name__,
                'message': str(e),
                'trace': traceback.format_exc()
            }
        })

    else:
        scraperwiki.status('ok')
        print json.dumps({
            'success': {
                'type': 'ok',
                'message': "Saved Flickr photo information"
            }
        })

def search_flickr(searchvalue):
    favs = flickr.walk(tags=searchvalue, extras="geo")
    rows = []
    for i, photo in enumerate(favs):
        if photo.get('latitude') != '0':
            row = OrderedDict()
            row['id'] = photo.get('id')
            row['title'] = photo.get('title')
Exemple #35
0
 def test_does_nothing_if_called_outside_box(self):
     scraperwiki.status('ok')
def update_status():
    status_text = 'Latest entry: {}'.format(
        get_most_recent_record('events', 'date'))
    print(status_text)

    scraperwiki.status('ok', status_text)