Example #1
0
def main():

    # Parse commandline arguments
    parser = argparse.ArgumentParser(description='Get layer name, environment and verbosity for gfw-sync.')
    parser.add_argument('--environment', '-e', default='DEV', choices=('DEV', 'PROD'),
                        help='the environment/config files to use for this run')
    parser.add_argument('--layer', '-l', required=True,
                        help='the data layer to process; must match a value for tech_title in the config')
    parser.add_argument('--verbose', '-v', default='debug', choices=('debug', 'info', 'warning', 'error'),
                        help='set verbosity level to print and write to file')
    args = parser.parse_args()

    # Instantiate logger; write to {dir}\logs
    logging = logger.build_logger(args.verbose)
    logging.info("\n{0}\n{1} v{2}\n{0}\n".format('*' * 50, settings.get_settings(args.environment)['tool_info']['name'],
                                                 settings.get_settings(args.environment)['tool_info']['version']))
    logging.critical('Starting | {0}'.format(args.layer))

    # Open the correct sheet of the config table (PROD | DEV) and get the layerdef
    # Config table: https://docs.google.com/spreadsheets/d/1pkJCLNe9HWAHqxQh__s-tYQr9wJzGCb6rmRBPj8yRWI/edit#gid=0
    layerdef = gs.get_layerdef(args.layer, args.environment)

    # Pass the layerdef to the build_layer function
    layer = layer_decision_tree.build_layer(layerdef, args.environment)

    # Update the layer in the output data sources
    layer.update()

    # Update the last-updated timestamp in the config table
    gs.update_gs_timestamp(args.layer, args.environment)

    # Delete scratch workspace
    layer.cleanup()

    logging.critical('Finished | {0}'.format(args.layer))
Example #2
0
    def __init__(self, layerdef):
        logging.debug('Starting layer class')

        self._name = None
        self.name = layerdef['tech_title']

        self._gfw_env = None
        self.gfw_env = layerdef['gfw_env']

        self._scratch_workspace = None
        self.scratch_workspace = os.path.join(settings.get_settings(self.gfw_env)['paths']['scratch_workspace'],
                                              self.name)

        self._layer_type = None
        self.layer_type = layerdef['type']

        self._field_map = None
        self.field_map = layerdef['field_map']

        self._source = None
        self.source = layerdef['source']

        self._esri_service_output = None
        self.esri_service_output = layerdef['esri_service_output']

        self._cartodb_service_output = None
        self.cartodb_service_output = layerdef['cartodb_service_output']

        self._merge_where_field = None
        self.merge_where_field = layerdef['merge_where_field']

        self._delete_features_input_where_clause = None
        self.delete_features_input_where_clause = layerdef['delete_features_input_where_clause']

        self._archive_output = None
        self.archive_output = layerdef['archive_output']

        self._download_output = None
        self.download_output = layerdef['download_output']

        self._esri_mosaics = None
        self.esri_mosaics = layerdef['esri_mosaics']

        self._transformation = None
        self.transformation = layerdef['transformation']

        self._global_layer = None
        self.global_layer = layerdef['global_layer']

        self._add_country_value = None
        self.add_country_value = layerdef['add_country_value']

        self._vector_to_raster_output = None
        self.vector_to_raster_output = layerdef['vector_to_raster_output']

        self._tile_cache_service = None
        self.tile_cache_service = layerdef['tile_cache_service']

        self._post_process_script = None
        self.post_process_script = layerdef['post_process_script']
Example #3
0
def main():

    # Parse commandline arguments
    parser = argparse.ArgumentParser(
        description='Get layer name, environment and verbosity for gfw-sync.')
    parser.add_argument(
        '--environment',
        '-e',
        default='staging',
        choices=('staging', 'prod'),
        help='the environment/config files to use for this run')
    parser.add_argument(
        '--layer',
        '-l',
        required=True,
        help=
        'the data layer to process; must match a value for tech_title in the config'
    )
    parser.add_argument('--verbose',
                        '-v',
                        default='debug',
                        choices=('debug', 'info', 'warning', 'error'),
                        help='set verbosity level to print and write to file')
    args = parser.parse_args()

    # Instantiate logger; write to {dir}\logs
    logging = logger.build_logger(args.verbose)
    logging.info("\n{0}\n{1} v{2}\n{0}\n".format(
        '*' * 50,
        settings.get_settings(args.environment)['tool_info']['name'],
        settings.get_settings(args.environment)['tool_info']['version']))
    logging.critical('Starting | {0}'.format(args.layer))

    # Open the correct sheet of the config table (PROD | DEV) and get the layerdef
    # Config table: https://docs.google.com/spreadsheets/d/1pkJCLNe9HWAHqxQh__s-tYQr9wJzGCb6rmRBPj8yRWI/edit#gid=0
    layerdef = gs.get_layerdef(args.layer, args.environment)

    # Pass the layerdef to the build_layer function
    layer = layer_decision_tree.build_layer(layerdef, args.environment)

    # Update the layer in the output data sources
    layer.update()

    # Update the last-updated timestamp in the config table
    gs.update_gs_timestamp(args.layer, args.environment)

    logging.critical('Finished | {0}'.format(args.layer))
Example #4
0
    def __init__(self, layerdef):
        logging.debug('Starting layer class')

        self._name = None
        self.name = layerdef['tech_title']

        self._gfw_env = None
        self.gfw_env = layerdef['gfw_env']

        self._scratch_workspace = None
        self.scratch_workspace = os.path.join(
            settings.get_settings(self.gfw_env)['paths']['scratch_workspace'],
            self.name)

        self._layer_type = None
        self.layer_type = layerdef['type']

        self._field_map = None
        self.field_map = layerdef['field_map']

        self._source = None
        self.source = layerdef['source']

        self._esri_service_output = None
        self.esri_service_output = layerdef['esri_service_output']

        self._cartodb_service_output = None
        self.cartodb_service_output = layerdef['cartodb_service_output']

        self._merge_where_field = None
        self.merge_where_field = layerdef['merge_where_field']

        self._delete_features_input_where_clause = None
        self.delete_features_input_where_clause = layerdef[
            'delete_features_input_where_clause']

        self._archive_output = None
        self.archive_output = layerdef['archive_output']

        self._download_output = None
        self.download_output = layerdef['download_output']

        self._transformation = None
        self.transformation = layerdef['transformation']

        self._global_layer = None
        self.global_layer = layerdef['global_layer']

        self._add_country_value = None
        self.add_country_value = layerdef['add_country_value']

        self._vector_to_raster_output = None
        self.vector_to_raster_output = layerdef['vector_to_raster_output']

        self._tile_cache_service = None
        self.tile_cache_service = layerdef['tile_cache_service']

        self._post_process_script = None
        self.post_process_script = layerdef['post_process_script']
Example #5
0
    def post_process_script(self, p):
        if not p:
            p = None

        else:
            root_dir = settings.get_settings(self.gfw_env)['paths']['root_dir']
            script_path = os.path.join(root_dir, 'postprocess', p)
            if not os.path.exists(script_path):
                logging.error('Post processing script {0} specified, but not '
                              'in expected location {1}. Exiting'.format(p, script_path))
                sys.exit(1)

            else:
                p = script_path

        self._post_process_script = p
Example #6
0
    def post_process_script(self, p):
        if not p:
            p = None

        else:
            root_dir = settings.get_settings(self.gfw_env)['paths']['root_dir']
            script_path = os.path.join(root_dir, 'postprocess', p)
            if not os.path.exists(script_path):
                logging.error('Post processing script {0} specified, but not '
                              'in expected location {1}. Exiting'.format(
                                  p, script_path))
                sys.exit(1)

            else:
                p = script_path

        self._post_process_script = p
Example #7
0
    def __init__(self, layerdef):
        logging.debug('Starting datasource class')

        self.layerdef = layerdef

        self._name = None
        self.name = layerdef['tech_title']

        self._data_source = None
        self.data_source = layerdef['source']

        self._gfw_env = None
        self.gfw_env = layerdef['gfw_env']

        self._download_workspace = None
        self.download_workspace = os.path.join(settings.get_settings(self.gfw_env)['paths']['scratch_workspace'],
                                               'downloads', self.name)
Example #8
0
def post_process(layerdef):
    """
    Run the forma_layer script to trigger the post process
    Postprocess will download the update output from country page analysis,
    add headers, and reupload to S3.
    :param layerdef: the layerdef
    :return:
    """
    logging.debug('Starting PostProcess')

    #Download GEE output to temp workspace
    today = datetime.datetime.today().strftime('%Y-%m-%d')
    gee_path = 'gs://forma-2017/tmp/csv/forma_alerts_2012-01-01_{}.csvee_export.csv'.format(
        today)
    download_workspace = os.path.join(
        settings.get_settings('prod')['paths']['scratch_workspace'],
        'downloads', 'forma')

    gsutil_cmd = 'gsutil cp {0} {1}'.format(gee_path, download_workspace)
    subprocess.Popen(gsutil_cmd, shell=True, stderr=subprocess.PIPE)
    logging.debug('Csv copied from Google to File')

    #Copy GEE output to S3
    temp_file = download_workspace + '\\' + os.path.basename(gee_path)
    cmd = ['aws', 's3', 'cp', temp_file, 's3://gfw2-data/alerts-tsv/forma.csv']
    subprocess.check_call(cmd)
    logging.debug('File copied to S3')

    #Charlie to trigger country page analyses
    #>>>>>>>>>>>>>>country analysis<<<<<<<<<<<<<<<

    #copy output from Country pages down
    #will be replaced with: current_s3_path = update_elastic.get_current_hadoop_output('forma', 's3')
    # today_folder = datetime.datetime.today().strftime('%Y%m%d')
    # current_s3_path = 's3://gfw2-data/alerts-tsv/temp/output-forma-summary-{}/part-'.format(today_folder)
    current_s3_path = 's3://gfw2-data/alerts-tsv/temp/output-forma-summary-20170630/part-'

    #add headers to country analysis output
    header_text = 'alert_delta,lat,long,country_iso,day,value'
    update_elastic.add_headers_to_s3(layerdef, current_s3_path, header_text)
    logging.debug('headers added to analysis output')
Example #9
0
    def __init__(self, layerdef):
        logging.debug('Starting datasource class')

        self.layerdef = layerdef

        self._name = None
        self.name = layerdef['tech_title']

        self._data_source = None
        self.data_source = layerdef['source']

        self._download_output = None
        self.download_output = layerdef['download_output']

        self._carto_table = None
        self.carto_table = layerdef['cartodb_service_output']

        self._gfw_env = None
        self.gfw_env = layerdef['gfw_env']

        self._download_workspace = None
        self.download_workspace = os.path.join(
            settings.get_settings(self.gfw_env)['paths']['scratch_workspace'],
            'downloads', self.name)
Example #10
0
    def get_layer(self):
        """
        Download the source rasters from S3
        :return: an updated layerdef with the local source for the layer.update() process
        """

        raster_url_list = self.data_source.split(',')

        # always update if it's GLAD, not using s3 bucket system currently
        if self.name == 'umd_landsat_alerts':
            paths_dict = settings.get_settings(self.gfw_env)['paths']
            scratch_workspace = os.path.join(paths_dict['scratch_workspace'],
                                             self.name)
            download_glad_gee.download(scratch_workspace)

        updated_raster_url_list = self.find_updated_data(raster_url_list)

        if updated_raster_url_list:
            output_list = []

            for ras in updated_raster_url_list:
                out_file = self.download_file(ras, self.download_workspace)
                output_list.append(out_file)

            self.layerdef['source'] = output_list

        else:
            # Important for the script that reads the log file and sends an email
            # Including this 'Checked' message will show that we checked the layer but it didn't need updating
            logging.debug(
                'Checked S3 bucket, no new data as compared to last timestamp in gfw-sync2 config'
            )
            logging.critical('Checked | {0}'.format(self.name))
            sys.exit(0)

        return self.layerdef