def produce_monthly_climatology(months, year_start, year_end):
    import calendar
    from utils import get_netCDF_filepath

    year_range = str(year_start) + '-' + str(year_end)

    for month in months:
        label = calendar.month_abbr[month] + '_' + year_range + '_average'

        month_days = []
        for year in range(year_start, year_end + 1):
            n_days = calendar.monthrange(year, month)[1]
            month_days = month_days + date_range(
                datetime.date(year, month, 1),
                datetime.date(year, month, n_days))

        avg_period = str(month).zfill(2)
        tau_fp = get_netCDF_filepath(field_type='monthly_climo',
                                     date=datetime.date(year, month, 1),
                                     year_start=year_start,
                                     year_end=year_end)

        surface_stress_dataset = SurfaceStressDataWriter(
            field_type='monthly_climo',
            date=month_days[-1],
            year_start=year_start,
            year_end=year_end)
        surface_stress_dataset.date = month_days[-1]

        surface_stress_dataset.compute_mean_fields(
            month_days, avg_method='partial_data_ok')

        surface_stress_dataset.plot_diagnostic_fields(plot_type='custom',
                                                      custom_label=label)
        surface_stress_dataset.write_fields_to_netcdf()
Exemple #2
0
def get_available_courts_between_dates(from_date, to_date):
    timetables = {}
    from_date_obj = datetime.strptime(from_date, '%Y-%m-%d').date()
    to_date_obj = datetime.strptime(to_date, '%Y-%m-%d').date()
    for current_date_obj in date_range(from_date_obj, to_date_obj):
        timetables[current_date_obj.strftime(
            '%Y-%m-%d')] = Squash2000.get_timetable(
                current_date_obj).get_available_courts()
    return jsonify(timetables)
def produce_annual_mean(year):
    dates = date_range(datetime.date(year, 1, 1), datetime.date(year, 12, 31))

    surface_stress_dataset = SurfaceStressDataWriter(field_type='annual',
                                                     date=dates[0])
    surface_stress_dataset.compute_mean_fields(dates,
                                               avg_method='partial_data_ok')
    surface_stress_dataset.plot_diagnostic_fields(plot_type='annual')
    surface_stress_dataset.write_fields_to_netcdf()
def process_month(date_in_month):
    """ Process one month. """
    from utils import date_range

    year = date_in_month.year
    month = date_in_month.month

    n_days = calendar.monthrange(year, month)[1]
    dates = date_range(datetime.date(year, month, 1), datetime.date(year, month, n_days))

    Parallel(n_jobs=16)(delayed(process_day)(datetime.date(year, month, day)) for day in range(1, n_days + 1))
    def init_timesheet():
        '''Creates an empty timesheet.'''
        timesheet = {}

        for day in date_range(start_date, end_date + timedelta(days=1)):
            week = find_week(day)
            if week not in timesheet:
                timesheet[week] = {}

            timesheet[week][str(day)] = []

        return timesheet
Exemple #6
0
def draw_active_contribs_trends(actives_windows, actives, actives_avg, start_date, end_date):
    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    len_all_dates = len(all_dates)
    max_yval = 0
    for aw, rolling_avg_windows in actives_windows:
        for r_a_w in rolling_avg_windows:
            pyplot.plot(x_vals, actives_avg[aw][r_a_w][-len_all_dates:], '-',
                        label="%d day avg (of %d day total)" % (r_a_w, aw), linewidth=5)
            max_yval = max(max_yval, *actives_avg[aw][r_a_w][-len_all_dates:])
    pyplot.title('Active contributors (as of %s)' % datetime.datetime.now().date())
    pyplot.ylabel('Contributor Count')
    pyplot.legend(loc='upper left')
    x_tick_locs = []
    x_tick_vals = []
    for i, d in enumerate(all_dates):
        if d in RELEASE_DATES:
            pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        if not i % 60:
            x_tick_locs.append(i)
            x_tick_vals.append(d)
    x_tick_locs.append(len(all_dates))
    if len(all_dates) - x_tick_locs[-1] > 30:
        x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs, x_tick_vals, rotation=30, horizontalalignment='right')
    pyplot.grid(b=True, which='both', axis='both')
    pyplot.xlim(-1, x_tick_locs[-1] + 1)
    pyplot.ylim(0, max_yval + 5)
    ax = pyplot.gca()
    fig = pyplot.gcf()
    fig.set_size_inches(24, 8)
    fig.set_frameon(False)
    fig.savefig('active_contribs.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()

    # small verison
    window = 90
    for aw, rolling_avg_windows in actives_windows:
        for r_a_w in rolling_avg_windows[:1]:  # the first window configured
            pyplot.plot(x_vals[:window], actives_avg[aw][r_a_w][-window:], '-',
                        label="%d day avg (of %d day total)" % (r_a_w, aw), linewidth=3)
    pyplot.grid(b=False, which='both', axis='both')
    pyplot.xticks([], [])
    pyplot.yticks([], [])
    pyplot.xlim(-1, window + 1)
    ax = pyplot.gca()
    ax.set_frame_on(True)
    ax.set_facecolor('black')  # change to (24, 24, 24)
    fig = pyplot.gcf()
    fig.set_size_inches(2, 2./3)
    fig.savefig('active_contribs_small.png', bbox_inches='tight', pad_inches=0)
    pyplot.close()
 def get_value(self):
     for day in date_range(date(2016, 4, 1), date(2016, 10, 1)):
         df = self.read_dataframe(day)
         if df.empty:
             continue
         for name in self.names:
             name_df = df.loc[df['name'] == name]
             if name_df.shape[0] > 1:
                 self.names.remove(name)
                 print('find dup value')
                 continue
             else:
                 self.value[name].append(list(name_df.iloc[0]))
Exemple #8
0
 def process(self, catagory):
     self.catagory = catagory
     last_date_data = read_dataframe_with_hash(self.start_date, self.catagory)
     for current_date in date_range((self.start_date+timedelta(1)), self.end_date):
         current_date_data = read_dataframe(current_date, self.catagory) 
         if current_date_data.empty:
             continue
         else:
             current_date_data['hash'] = '000000'
             solver = Solver(last_date_data, current_date_data, current_date, self.max_index)
             self.max_index += solver.process()
             solver.write_csv(self.catagory)
             last_date_data = current_date_data
             self.logger.info('{}: {}data have been completed'.format(datetime.now(), current_date)) 
     print(self.max_index)  
def produce_monthly_mean(date_in_month):
    month = date_in_month.month
    year = date_in_month.year

    date1 = datetime.date(year, month, 1)
    n_days = calendar.monthrange(date1.year, date1.month)[1]
    date2 = datetime.date(year, month, n_days)
    dates = date_range(date1, date2)

    surface_stress_dataset = SurfaceStressDataWriter(field_type='monthly',
                                                     date=dates[0])
    surface_stress_dataset.compute_mean_fields(dates,
                                               avg_method='partial_data_ok')
    surface_stress_dataset.plot_diagnostic_fields(plot_type='monthly')
    surface_stress_dataset.write_fields_to_netcdf()
def process_range(start, end, data_dir):
    dates = date_range(arrow.get(start), arrow.get(end))
    print('Processing dates from {} to {}'.format(start, end))

    path = os.path.join(data_dir, 'vehicle_positions') + '/{}.csv'
    paths = map(lambda day: (path.format(day), arrow.get(day)), dates)

    results = []
    for fpath, day in paths:
        stops = get_metadata(day, 'stops', data_dir)
        schedule = get_metadata(day, 'schedule', data_dir)
        now = arrow.now()
        print('Processing file:', fpath)
        df = process_day(pd.read_csv(fpath), stops, schedule)
        results.append(df)
        print('Process {} in {}s'.format(day, (arrow.now() - now).seconds))

    combined = pd.concat(results)
    combined.to_csv('{}_{}.csv'.format(start, end), index=False)
def produce_climatology(year_start, year_end):
    from utils import get_netCDF_filepath

    climo_label = str(year_start) + '-' + str(year_end) + '_average'

    dates = date_range(datetime.date(year_start, 1, 1),
                       datetime.date(year_end, 12, 31))

    surface_stress_dataset = SurfaceStressDataWriter(field_type='climo',
                                                     year_start=year_start,
                                                     year_end=year_end)
    surface_stress_dataset.date = dates[0]

    surface_stress_dataset.compute_mean_fields(dates,
                                               avg_method='partial_data_ok')

    surface_stress_dataset.plot_diagnostic_fields(plot_type='custom',
                                                  custom_label=climo_label)
    surface_stress_dataset.write_fields_to_netcdf()
Exemple #12
0
def load(conf_file, DATE):
    conf_dict = {}
    for line in file(conf_file):
        key, value = line.rstrip().split("=")
        if "$DATE" in value:
            value = value.replace("$DATE", DATE)
        if "DATE_SHIFT" in value:
            reg = re.compile(r'DATE_SHIFT\((\d+), ([+-])\)')
            delta, direction = re.findall(reg, value)[0]
            date = utils.date_shift(DATE, int(delta), direction)
            reg = re.compile(r'DATE_SHIFT\(\d+, [+-]\)')
            value = value.replace(reg.findall(value)[0], date)
        if "DATE_RANGE" in value:
            reg = re.compile(r'DATE_RANGE\((\d+), (\d+), ([+-])\)')
            start_date, delta, direction = re.findall(reg, value)[0]
            dates = utils.date_range(start_date, int(delta), direction)
            reg = re.compile(r'DATE_RANGE\(\d+, \d+, [+-]\)')
            value = value.replace(
                reg.findall(value)[0], "{%s}" % ",".join(dates))
        conf_dict[key] = value
    return conf_dict
Exemple #13
0
def todos(method, get, post, user):
    '''计划'''
    if 'page' in get:
        try:
            page = int(get['page'])
            assert page > 0
        except:
            pass
    else:
        page = 1

    qs = Todo.todos.filter(user=user)
    if 'range' in get:
        r = get['range']
        if r == 'undone':
            qs = qs.filter(done=False)
        else:
            dates = date_range(r)
            if not type(dates) is tuple:
                return []
            qs = qs.filter(created__range=dates)
    elif 'tag' in get:
        t = get['tag'].strip()
        if t:
            qs = Todo.objects.filter(tags__name=t, user=user)
        else:
            return []
    elif 'query' in get:
        t = get['query'].strip()
        if t:
            qs = Todo.objects.filter(content__contains=t, user=user)
        else:
            return []
    else:
        return []
    start = (page - 1) * PAGESIZE
    end = start + PAGESIZE
    return [i.to_dict() for i in qs[start:end]]
Exemple #14
0
def todos(method, get, post, user):
	'''计划'''
	if 'page' in get:
		try:
			page = int(get['page'])
			assert page > 0
		except:
			pass
	else:
		page = 1

	qs = Todo.todos.filter(user=user)
	if 'range' in get:
		r = get['range']
		if r == 'undone':
			qs = qs.filter(done=False)
		else:
			dates = date_range(r)
			if not type(dates) is tuple:
				return []
			qs = qs.filter(created__range=dates)
	elif 'tag' in get:
		t = get['tag'].strip()
		if t:
			qs = Todo.objects.filter(tags__name=t, user=user)
		else:
			return []
	elif 'query' in get:
		t = get['query'].strip()
		if t:
			qs = Todo.objects.filter(content__contains=t, user=user)
		else:
			return []
	else:
		return []
	start = (page - 1) * PAGESIZE
	end = start + PAGESIZE
	return [i.to_dict() for i in qs[start:end]]
Exemple #15
0
def main(db_path, debug):
    if debug:
        logger.setLevel(logging.DEBUG)

    db = SqliteDatabase(path=db_path)
    end_date = pendulum.now()
    step = pendulum.Interval(minutes=1000)

    symbols = get_symbols()
    logging.info(f'Found {len(symbols)} symbols')
    for i, symbol in enumerate(symbols, 1):
        # get start date for symbol
        # this is either the last entry from the db
        # or the trading start date (from json file)
        latest_candle_date = db.get_latest_candle_date(symbol)
        if latest_candle_date is None:
            logging.debug('No previous entries in db. Starting from scratch')
            # TODO: handle case when symbol is missing from trading start days
            # e.g. symbol is in symbols.json but not in symbols_trading_start_days.json
            start_date = symbol_start_date(symbol)
        else:
            logging.debug('Found previous db entries. Resuming from latest')
            start_date = latest_candle_date

        logging.info(
            f'{i}/{len(symbols)} | {symbol} | Processing from {start_date.to_datetime_string()}'
        )
        for d1, d2 in date_range(start_date, end_date, step):
            logging.debug(f'{d1} -> {d2}')
            # returns (max) 1000 candles, one for every minute
            candles = get_candles(symbol, d1, d2)
            logging.debug(f'Fetched {len(candles)} candles')
            if candles:
                db.insert_candles(symbol, candles)

            # prevent from api rate-limiting
            time.sleep(3)
    db.close()
Exemple #16
0
def worker(args):
    (
        pred_path,
        am_pm,
        region,
        mask_code,
        out_path,
    ) = args
    db = get_db_session("../data/dbs/wmo_gsod.db")
    dates = date_range(dt.date(1988, 1, 2), dt.date(2019, 1, 1))
    trans = REGION_TO_TRANS[region]
    lon, lat = [trans(x) for x in eg.v1_get_full_grid_lonlat(eg.ML)]
    land = trans(np.load("../data/masks/ft_esdr_land_mask.npy"))
    water = ~land
    non_cc_mask = trans(
        np.load("../data/masks/ft_esdr_non_cold_constrained_mask.npy"))
    invalid = non_cc_mask | water
    cc_mask = ~invalid
    inv_cc_mask = land & ~cc_mask
    mask = None
    if mask_code == CC_MASK:
        mask = cc_mask
    elif mask_code == LAND_MASK:
        mask = land
    else:
        mask = inv_cc_mask

    pred = trans(np.load(pred_path))
    df = validate_against_aws_db(pred,
                                 db,
                                 dates,
                                 lon,
                                 lat,
                                 mask,
                                 am_pm,
                                 progress=False)
    df.to_csv(out_path)
def produce_seasonal_mean(seasons_to_compute, year):
    seasons = {
        'JFM': {
            'date1': datetime.date(year, 1, 1),
            'date2': datetime.date(year, 3, 31),
            'label': 'Summer_JFM_' + str(year) + '_average'
        },
        'AMJ': {
            'date1': datetime.date(year, 4, 1),
            'date2': datetime.date(year, 6, 30),
            'label': 'Fall_AMJ_' + str(year) + '_average'
        },
        'JAS': {
            'date1': datetime.date(year, 7, 1),
            'date2': datetime.date(year, 9, 30),
            'label': 'Winter_JAS_' + str(year) + '_average'
        },
        'OND': {
            'date1': datetime.date(year, 10, 1),
            'date2': datetime.date(year, 12, 31),
            'label': 'Spring_JFM_' + str(year) + '_average'
        }
    }

    for s in seasons_to_compute:
        logger.info('s={:s}'.format(s))
        dates = date_range(seasons[s]['date1'], seasons[s]['date2'])

        surface_stress_dataset = SurfaceStressDataWriter(None)
        surface_stress_dataset.date = dates[0]
        surface_stress_dataset.compute_mean_fields(
            dates, avg_method='partial_data_ok')
        surface_stress_dataset.plot_diagnostic_fields(
            plot_type='custom', custom_label=seasons[s]['label'])
        surface_stress_dataset.write_fields_to_netcdf(field_type='seasonal',
                                                      season_str=s)
Exemple #18
0
    return contribs_by_date, authors_by_count


def save_commits(data):
    with open(COMMITS_FILENAME, 'wb') as f:
        json.dump(data, f)


if __name__ == '__main__':
    people_by_date = collections.defaultdict(list)
    dates_by_person = collections.defaultdict(list)
    for line in sys.stdin.readlines():
        if not line.strip():
            continue
        name, email, timestamp = line.strip().split('|')
        person = ('%s %s' % (name, email)).decode('utf8')
        person = '%s %s' % (map_one_person(person), email)
        if person.lower() in excluded_authors:
            continue
        ts = dateutil.parser.parse(timestamp).strftime('%Y-%m-%d')
        people_by_date[ts].append(person)
        dates_by_person[person].append(ts)

    # fill in any missing days
    first_date = min(people_by_date.keys())
    for day in date_range(first_date, datetime.datetime.now()):
        if day not in people_by_date:
            people_by_date[day] = []

    save_commits((people_by_date, dates_by_person))
Exemple #19
0
def main(ini_path=None,
         overwrite_flag=False,
         delay_time=0,
         gee_key_file=None,
         max_ready=-1,
         cron_flag=False,
         reverse_flag=False,
         update_flag=False):
    """Compute scene Tcorr images by date

    Parameters
    ----------
    ini_path : str
        Input file path.
    overwrite_flag : bool, optional
        If True, overwrite existing files if the export dates are the same and
        generate new images (but with different export dates) even if the tile
        lists are the same.  The default is False.
    delay_time : float, optional
        Delay time in seconds between starting export tasks (or checking the
        number of queued tasks, see "max_ready" parameter).  The default is 0.
    gee_key_file : str, None, optional
        Earth Engine service account JSON key file (the default is None).
    max_ready: int, optional
        Maximum number of queued "READY" tasks.  The default is -1 which is
        implies no limit to the number of tasks that will be submitted.
    cron_flag: bool, optional
        Not currently implemented.
    reverse_flag : bool, optional
        If True, process dates in reverse order.
    update_flag : bool, optional
        If True, only overwrite scenes with an older model version.

    """
    logging.info('\nCompute scene Tcorr images by date')

    ini = utils.read_ini(ini_path)

    model_name = 'SSEBOP'
    # model_name = ini['INPUTS']['et_model'].upper()

    tmax_name = ini[model_name]['tmax_source']

    export_id_fmt = 'tcorr_scene_{product}_{scene_id}'
    asset_id_fmt = '{coll_id}/{scene_id}'

    tcorr_scene_coll_id = '{}/{}_scene'.format(ini['EXPORT']['export_coll'],
                                               tmax_name.lower())

    try:
        wrs2_tiles = str(ini['INPUTS']['wrs2_tiles'])
        wrs2_tiles = sorted([x.strip() for x in wrs2_tiles.split(',')])
    except KeyError:
        wrs2_tiles = []
        logging.debug('  wrs2_tiles: not set in INI, defaulting to []')
    except Exception as e:
        raise e

    try:
        study_area_extent = str(ini['INPUTS']['study_area_extent']) \
            .replace('[', '').replace(']', '').split(',')
        study_area_extent = [float(x.strip()) for x in study_area_extent]
    except KeyError:
        study_area_extent = None
        logging.debug('  study_area_extent: not set in INI')
    except Exception as e:
        raise e

    # TODO: Add try/except blocks and default values?
    collections = [x.strip() for x in ini['INPUTS']['collections'].split(',')]
    cloud_cover = float(ini['INPUTS']['cloud_cover'])
    min_pixel_count = float(ini['TCORR']['min_pixel_count'])
    # min_scene_count = float(ini['TCORR']['min_scene_count'])

    if (tmax_name.upper() == 'CIMIS'
            and ini['INPUTS']['end_date'] < '2003-10-01'):
        logging.error(
            '\nCIMIS is not currently available before 2003-10-01, exiting\n')
        sys.exit()
    elif (tmax_name.upper() == 'DAYMET'
          and ini['INPUTS']['end_date'] > '2018-12-31'):
        logging.warning('\nDAYMET is not currently available past 2018-12-31, '
                        'using median Tmax values\n')
        # sys.exit()
    # elif (tmax_name.upper() == 'TOPOWX' and
    #         ini['INPUTS']['end_date'] > '2017-12-31'):
    #     logging.warning(
    #         '\nDAYMET is not currently available past 2017-12-31, '
    #         'using median Tmax values\n')
    #     # sys.exit()

    # Extract the model keyword arguments from the INI
    # Set the property name to lower case and try to cast values to numbers
    model_args = {
        k.lower(): float(v) if utils.is_number(v) else v
        for k, v in dict(ini[model_name]).items()
    }
    # et_reference_args = {
    #     k: model_args.pop(k)
    #     for k in [k for k in model_args.keys() if k.startswith('et_reference_')]}

    logging.info('\nInitializing Earth Engine')
    if gee_key_file:
        logging.info(
            '  Using service account key file: {}'.format(gee_key_file))
        # The "EE_ACCOUNT" parameter is not used if the key file is valid
        ee.Initialize(ee.ServiceAccountCredentials('x', key_file=gee_key_file),
                      use_cloud_api=True)
    else:
        ee.Initialize(use_cloud_api=True)

    # Get a Tmax image to set the Tcorr values to
    logging.debug('\nTmax properties')
    tmax_source = tmax_name.split('_', 1)[0]
    tmax_version = tmax_name.split('_', 1)[1]
    if 'MEDIAN' in tmax_name.upper():
        tmax_coll_id = 'projects/earthengine-legacy/assets/' \
                       'projects/usgs-ssebop/tmax/{}'.format(tmax_name.lower())
        tmax_coll = ee.ImageCollection(tmax_coll_id)
        tmax_mask = ee.Image(tmax_coll.first()).select([0]).multiply(0)
    else:
        # TODO: Add support for non-median tmax sources
        raise ValueError('unsupported tmax_source: {}'.format(tmax_name))
    logging.debug('  Collection: {}'.format(tmax_coll_id))
    logging.debug('  Source:  {}'.format(tmax_source))
    logging.debug('  Version: {}'.format(tmax_version))

    logging.debug('\nExport properties')
    export_info = utils.get_info(ee.Image(tmax_mask))
    if 'daymet' in tmax_name.lower():
        # Custom smaller extent for DAYMET focused on CONUS
        export_extent = [-1999750, -1890500, 2500250, 1109500]
        export_shape = [4500, 3000]
        export_geo = [1000, 0, -1999750, 0, -1000, 1109500]
        # Custom medium extent for DAYMET of CONUS, Mexico, and southern Canada
        # export_extent = [-2099750, -3090500, 2900250, 1909500]
        # export_shape = [5000, 5000]
        # export_geo = [1000, 0, -2099750, 0, -1000, 1909500]
        export_crs = export_info['bands'][0]['crs']
    else:
        export_crs = export_info['bands'][0]['crs']
        export_geo = export_info['bands'][0]['crs_transform']
        export_shape = export_info['bands'][0]['dimensions']
        # export_geo = ee.Image(tmax_mask).projection().getInfo()['transform']
        # export_crs = ee.Image(tmax_mask).projection().getInfo()['crs']
        # export_shape = ee.Image(tmax_mask).getInfo()['bands'][0]['dimensions']
        export_extent = [
            export_geo[2], export_geo[5] + export_shape[1] * export_geo[4],
            export_geo[2] + export_shape[0] * export_geo[0], export_geo[5]
        ]
    export_geom = ee.Geometry.Rectangle(export_extent,
                                        proj=export_crs,
                                        geodesic=False)
    logging.debug('  CRS: {}'.format(export_crs))
    logging.debug('  Extent: {}'.format(export_extent))
    logging.debug('  Geo: {}'.format(export_geo))
    logging.debug('  Shape: {}'.format(export_shape))

    if study_area_extent is None:
        if 'daymet' in tmax_name.lower():
            # CGM - For now force DAYMET to a slightly smaller "CONUS" extent
            study_area_extent = [-125, 25, -65, 49]
            # study_area_extent =  [-125, 25, -65, 52]
        elif 'cimis' in tmax_name.lower():
            study_area_extent = [-124, 35, -119, 42]
        else:
            # TODO: Make sure output from bounds is in WGS84
            study_area_extent = tmax_mask.geometry().bounds().getInfo()
        logging.debug(f'\nStudy area extent not set in INI, '
                      f'default to {study_area_extent}')
    study_area_geom = ee.Geometry.Rectangle(study_area_extent,
                                            proj='EPSG:4326',
                                            geodesic=False)

    # Intersect study area with export extent
    export_geom = export_geom.intersection(study_area_geom, 1)
    # logging.debug('Extent: {}'.format(export_geom.bounds().getInfo()))

    # If cell_size parameter is set in the INI,
    # adjust the output cellsize and recompute the transform and shape
    try:
        export_cs = float(ini['EXPORT']['cell_size'])
        export_shape = [
            int(math.ceil(abs((export_shape[0] * export_geo[0]) / export_cs))),
            int(math.ceil(abs((export_shape[1] * export_geo[4]) / export_cs)))
        ]
        export_geo = [
            export_cs, 0.0, export_geo[2], 0.0, -export_cs, export_geo[5]
        ]
        logging.debug('  Custom export cell size: {}'.format(export_cs))
        logging.debug('  Geo: {}'.format(export_geo))
        logging.debug('  Shape: {}'.format(export_shape))
    except KeyError:
        pass

    if not ee.data.getInfo(tcorr_scene_coll_id):
        logging.info('\nExport collection does not exist and will be built'
                     '\n  {}'.format(tcorr_scene_coll_id))
        input('Press ENTER to continue')
        ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, tcorr_scene_coll_id)

    # Get current asset list
    logging.debug('\nGetting GEE asset list')
    asset_list = utils.get_ee_assets(tcorr_scene_coll_id)
    # if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
    #     pprint.pprint(asset_list[:10])

    # Get current running tasks
    tasks = utils.get_ee_tasks()
    if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
        logging.debug('  Tasks: {}\n'.format(len(tasks)))
        input('ENTER')

    # TODO: Decide if month and year lists should be applied to scene exports
    # # Limit by year and month
    # try:
    #     month_list = sorted(list(utils.parse_int_set(ini['TCORR']['months'])))
    # except:
    #     logging.info('\nTCORR "months" parameter not set in the INI,'
    #                  '\n  Defaulting to all months (1-12)\n')
    #     month_list = list(range(1, 13))
    # try:
    #     year_list = sorted(list(utils.parse_int_set(ini['TCORR']['years'])))
    # except:
    #     logging.info('\nTCORR "years" parameter not set in the INI,'
    #                  '\n  Defaulting to all available years\n')
    #     year_list = []

    if cron_flag:
        # CGM - This seems like a silly way of getting the date as a datetime
        #   Why am I doing this and not using the commented out line?
        end_dt = datetime.date.today().strftime('%Y-%m-%d')
        end_dt = datetime.datetime.strptime(end_dt, '%Y-%m-%d')
        end_dt = end_dt + datetime.timedelta(days=-4)
        # end_dt = datetime.datetime.today() + datetime.timedelta(days=-1)
        start_dt = end_dt + datetime.timedelta(days=-64)
    else:
        start_dt = datetime.datetime.strptime(ini['INPUTS']['start_date'],
                                              '%Y-%m-%d')
        end_dt = datetime.datetime.strptime(ini['INPUTS']['end_date'],
                                            '%Y-%m-%d')

    if end_dt >= datetime.datetime.today():
        logging.debug('End Date:   {} - setting end date to current '
                      'date'.format(end_dt.strftime('%Y-%m-%d')))
        end_dt = datetime.datetime.today()
    if start_dt < datetime.datetime(1984, 3, 23):
        logging.debug('Start Date: {} - no Landsat 5+ images before '
                      '1984-03-23'.format(start_dt.strftime('%Y-%m-%d')))
        start_dt = datetime.datetime(1984, 3, 23)
    start_date = start_dt.strftime('%Y-%m-%d')
    end_date = end_dt.strftime('%Y-%m-%d')
    logging.debug('Start Date: {}'.format(start_date))
    logging.debug('End Date:   {}\n'.format(end_date))
    if start_dt > end_dt:
        raise ValueError('Start date must be before end date')

    # if update_flag:
    #     assets_info = utils.get_info(ee.ImageCollection(
    #         tcorr_scene_coll_id).filterDate(start_date, end_date))
    #     asset_props = {f'{scene_coll_id}/{x["properties"]["system:index"]}':
    #                        x['properties']
    #                    for x in assets_info['features']}
    # else:
    #     asset_props = {}

    for export_dt in sorted(utils.date_range(start_dt, end_dt),
                            reverse=reverse_flag):
        export_date = export_dt.strftime('%Y-%m-%d')
        next_date = (export_dt +
                     datetime.timedelta(days=1)).strftime('%Y-%m-%d')

        # # Uncomment to apply month and year list filtering
        # if month_list and export_dt.month not in month_list:
        #     logging.debug(f'Date: {export_date} - month not in INI - skipping')
        #     continue
        # elif year_list and export_dt.year not in year_list:
        #     logging.debug(f'Date: {export_date} - year not in INI - skipping')
        #     continue

        logging.info(f'Date: {export_date}')

        model_obj = ssebop.Collection(
            collections=collections,
            start_date=export_date,
            end_date=next_date,
            cloud_cover_max=cloud_cover,
            geometry=export_geom,
            model_args=model_args,
            # filter_args=filter_args,
        )
        landsat_coll = model_obj.overpass(variables=['ndvi'])
        # pprint.pprint(landsat_coll.aggregate_array('system:id').getInfo())
        # input('ENTER')

        try:
            image_id_list = landsat_coll.aggregate_array('system:id').getInfo()
        except Exception as e:
            logging.warning('  Error getting image ID list, skipping date')
            logging.debug(f'  {e}')
            continue

        if update_flag:
            assets_info = utils.get_info(
                ee.ImageCollection(tcorr_scene_coll_id).filterDate(
                    export_date, next_date))
            asset_props = {
                f'{tcorr_scene_coll_id}/{x["properties"]["system:index"]}':
                x['properties']
                for x in assets_info['features']
            }
        else:
            asset_props = {}

        # Sort by path/row
        for image_id in sorted(image_id_list,
                               key=lambda k: k.split('/')[-1].split('_')[-2],
                               reverse=True):
            coll_id, scene_id = image_id.rsplit('/', 1)

            wrs2_path = int(scene_id[5:8])
            wrs2_row = int(scene_id[8:11])
            wrs2_tile = 'p{:03d}r{:03d}'.format(wrs2_path, wrs2_row)
            if wrs2_tiles and wrs2_tile not in wrs2_tiles:
                logging.debug('  Not in wrs2_tiles, skipping')
                continue
            else:
                logging.info(f'{scene_id}')

            export_id = export_id_fmt.format(product=tmax_name.lower(),
                                             scene_id=scene_id)
            logging.debug(f'  Export ID: {export_id}')

            asset_id = asset_id_fmt.format(coll_id=tcorr_scene_coll_id,
                                           scene_id=scene_id)
            logging.debug(f'  Asset ID: {asset_id}')

            if update_flag:

                def version_number(version_str):
                    return list(map(int, version_str.split('.')))

                if export_id in tasks.keys():
                    logging.info('  Task already submitted, skipping')
                    continue
                # In update mode only overwrite if the version is old
                if asset_props and asset_id in asset_props.keys():
                    model_ver = version_number(ssebop.__version__)
                    asset_ver = version_number(
                        asset_props[asset_id]['model_version'])

                    if asset_ver < model_ver:
                        logging.info('  Asset model version is old, removing')
                        try:
                            ee.data.deleteAsset(asset_id)
                        except:
                            logging.info('  Error removing asset, skipping')
                            continue
                    else:
                        logging.info('  Asset is up to date, skipping')
                        continue
            elif overwrite_flag:
                if export_id in tasks.keys():
                    logging.debug('  Task already submitted, cancelling')
                    ee.data.cancelTask(tasks[export_id]['id'])
                # This is intentionally not an "elif" so that a task can be
                # cancelled and an existing image/file/asset can be removed
                if asset_id in asset_list:
                    logging.debug('  Asset already exists, removing')
                    ee.data.deleteAsset(asset_id)
            else:
                if export_id in tasks.keys():
                    logging.debug('  Task already submitted, exiting')
                    continue
                elif asset_id in asset_list:
                    logging.debug('  Asset already exists, skipping')
                    continue

            image = ee.Image(image_id)
            # TODO: Will need to be changed for SR or use from_image_id()
            t_obj = ssebop.Image.from_landsat_c1_toa(image_id, **model_args)
            t_stats = ee.Dictionary(t_obj.tcorr_stats) \
                .combine({'tcorr_p5': 0, 'tcorr_count': 0}, overwrite=False)
            tcorr = ee.Number(t_stats.get('tcorr_p5'))
            count = ee.Number(t_stats.get('tcorr_count'))
            index = ee.Algorithms.If(count.gte(min_pixel_count), 0, 9)

            # Write an empty image if the pixel count is too low
            tcorr_img = ee.Algorithms.If(count.gte(min_pixel_count),
                                         tmax_mask.add(tcorr),
                                         tmax_mask.updateMask(0))

            # Clip to the Landsat image footprint
            output_img = ee.Image(tcorr_img).clip(image.geometry())

            # Clear the transparency mask
            output_img = output_img.updateMask(output_img.unmask(0)) \
                .rename(['tcorr']) \
                .set({
                    'CLOUD_COVER': image.get('CLOUD_COVER'),
                    'CLOUD_COVER_LAND': image.get('CLOUD_COVER_LAND'),
                    # 'SPACECRAFT_ID': image.get('SPACECRAFT_ID'),
                    'coll_id': coll_id,
                    # 'cycle_day': ((export_dt - cycle_base_dt).days % 8) + 1,
                    'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'),
                    'date': export_dt.strftime('%Y-%m-%d'),
                    'doy': int(export_dt.strftime('%j')),
                    'model_name': model_name,
                    'model_version': ssebop.__version__,
                    'month': int(export_dt.month),
                    'scene_id': scene_id,
                    'system:time_start': image.get('system:time_start'),
                    'tcorr_value': tcorr,
                    'tcorr_index': index,
                    'tcorr_pixel_count': count,
                    'tmax_source': tmax_source.upper(),
                    'tmax_version': tmax_version.upper(),
                    'wrs2_path': wrs2_path,
                    'wrs2_row': wrs2_row,
                    'wrs2_tile': wrs2_tile,
                    'year': int(export_dt.year),
                })
            # pprint.pprint(output_img.getInfo()['properties'])
            # input('ENTER')

            logging.debug('  Building export task')
            task = ee.batch.Export.image.toAsset(
                image=output_img,
                description=export_id,
                assetId=asset_id,
                crs=export_crs,
                crsTransform='[' + ','.join(list(map(str, export_geo))) + ']',
                dimensions='{0}x{1}'.format(*export_shape),
            )

            logging.info('  Starting export task')
            utils.ee_task_start(task)

        # Pause before starting the next date (not export task)
        utils.delay_task(delay_time, max_ready)
        logging.debug('')
Exemple #20
0
    def build_r_0_arr(self):
        """Returns an array of the reproduction numbers (R) for each day.

        Each element in the array represents a single day in the simulation.
            For example, if self.first_date is 2020-03-01 and self.projection_end_date
            is 2020-09-01, then R_0_ARR[10] would be the R value on 2020-03-11.

        Full description at: https://covid19-projections.com/about/#effective-reproduction-number-r
            and https://covid19-projections.com/model-details/#modeling-the-r-value

        We use three different R values: R0, post-mitigation R, and reopening R.
            We use an inverse logistic/sigmoid function to smooth the transition between
            the three R values.

        """

        reopen_r = self.get_reopen_r()
        assert reopen_r >= self.LOCKDOWN_R_0, 'Reopen R must be >= lockdown R'
        assert 0.5 <= self.LOCKDOWN_FATIGUE <= 1.5, self.LOCKDOWN_FATIGUE

        reopen_date_shift = self.REOPEN_DATE + \
            datetime.timedelta(days=int(self.REOPEN_SHIFT_DAYS) + DEFAULT_REOPEN_SHIFT_DAYS)
        fatigue_idx = self.inflection_day_idx + DAYS_UNTIL_LOCKDOWN_FATIGUE
        reopen_idx = self.get_day_idx_from_date(reopen_date_shift)
        lockdown_reopen_midpoint_idx = (self.inflection_day_idx +
                                        reopen_idx) // 2

        if self.LOCKDOWN_R_0 <= 1:
            # we wait longer before applying the post-reopening decay to allow for
            # longer reopening time (since R_t <= 1)
            days_until_post_reopening = 30
        else:
            days_until_post_reopening = 15
        post_reopening_idx = reopen_idx + days_until_post_reopening
        fall_start_idx = self.get_day_idx_from_date(FALL_START_DATE_NORTH) - 30

        sig_lockdown = get_transition_sigmoid(self.inflection_day_idx,
                                              self.RATE_OF_INFLECTION,
                                              self.INITIAL_R_0,
                                              self.LOCKDOWN_R_0)
        sig_fatigue = get_transition_sigmoid(fatigue_idx,
                                             0.2,
                                             0,
                                             self.LOCKDOWN_FATIGUE - 1,
                                             check_values=False)
        sig_reopen = get_transition_sigmoid(reopen_idx, 0.2, self.LOCKDOWN_R_0,
                                            reopen_r)

        dates = utils.date_range(self.first_date, self.projection_end_date)
        assert len(dates) == self.N

        # how much to multiple reopen R to get to the equilibrium R (max 0.9)
        min_post_reopening_total_decay = min(
            0.9, self.post_reopening_equilibrium_r / reopen_r)

        R_0_ARR = [self.INITIAL_R_0]
        for day_idx in range(1, self.N):
            if day_idx < lockdown_reopen_midpoint_idx:
                r_t = sig_lockdown(day_idx)
            else:
                post_reopening_total_decay = fall_r_mult = 1

                if day_idx > post_reopening_idx:
                    assert day_idx > reopen_idx, day_idx
                    post_reopening_total_decay = min(
                        1.1,
                        max(
                            min_post_reopening_total_decay,
                            self.post_reopening_r_decay**(day_idx -
                                                          post_reopening_idx)))
                assert 0 < post_reopening_total_decay < 2, post_reopening_total_decay

                if day_idx > fall_start_idx:
                    fall_r_mult = max(
                        0.9,
                        min(1.2,
                            self.fall_r_multiplier**(day_idx -
                                                     fall_start_idx)))
                assert 0.9 <= fall_r_mult <= 1.2, fall_r_mult

                r_t = sig_reopen(
                    day_idx) * post_reopening_total_decay * fall_r_mult

            r_t *= 1 + sig_fatigue(day_idx)

            # Make sure R is stable
            if day_idx > reopen_idx and abs(r_t / R_0_ARR[-1] - 1) > 0.1:
                assert False, f'R changed too quickly: {day_idx} {R_0_ARR[-1]} -> {r_t} {R_0_ARR}'

            R_0_ARR.append(r_t)

        assert len(R_0_ARR) == self.N
        self.reopen_idx = reopen_idx

        return R_0_ARR
Exemple #21
0
def draw_contrib_activity_graph(dates_by_person, start_date, end_date,
                                extra_window):
    # this graph will show a little bit of the future
    end_date = datetime.datetime.strptime(end_date[:10],
                                          '%Y-%m-%d') + extra_window
    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    graphable_data = {}
    order = []
    for person, data in dates_by_person.iteritems():
        first_day = '9999-99-99'
        last_day = '0000-00-00'
        for key in data:
            first_day = min(first_day, min(data[key]))
            last_day = max(last_day, max(data[key]))
        if datetime.datetime.strptime(last_day, '%Y-%m-%d') < start_date:
            continue
        if datetime.datetime.strptime(first_day, '%Y-%m-%d') > end_date:
            continue
        order.append((first_day, last_day, person))
    order.sort(reverse=True)
    for first_day, last_day, person in order:
        review_data = []
        commit_data = []
        cumulative_data = []
        sparse_cumulative_data = []
        yval = len(graphable_data)
        for date in all_dates:
            person_data = dates_by_person[person]
            active_day = False
            if date in person_data['contribs']:
                commit_data.append(yval)
                active_day = True
            else:
                commit_data.append(None)
            if date in person_data['reviews']:
                review_data.append(yval)
                active_day = True
            else:
                review_data.append(None)
            if first_day <= date <= last_day:
                cumulative_data.append(yval)
            else:
                cumulative_data.append(None)
            if active_day:
                sparse_cumulative_data.append(yval)
            else:
                sparse_cumulative_data.append(None)
        lens = map(len, [
            commit_data, review_data, cumulative_data, sparse_cumulative_data,
            x_vals
        ])
        assert len(set(lens)) == 1, '%r %s' % (lens, person)
        graphable_data[person] = (yval, commit_data, review_data,
                                  cumulative_data, sparse_cumulative_data)

    person_labels = []
    person_active = []
    limited_all_dates_look_back = 180
    for person, (yval, commit_data, review_data, cumulative_data,
                 sparse_cumulative_data) in graphable_data.iteritems():
        name = person.split('<', 1)[0].strip()
        person_labels.append((yval, name))
        how_many_days_active_total = sparse_cumulative_data.count(yval)
        how_many_days_active_limited = sparse_cumulative_data[
            -limited_all_dates_look_back:].count(yval)
        how_many_days_active_limited2 = sparse_cumulative_data[
            -limited_all_dates_look_back *
            2:-limited_all_dates_look_back].count(yval)
        try:
            days_since_first_commit = len(x_vals) - commit_data.index(yval)
        except ValueError:
            days_since_first_commit = 0
        try:
            days_since_first_review = len(x_vals) - review_data.index(yval)
        except ValueError:
            days_since_first_review = 0
        days_since_first = max(days_since_first_review,
                               days_since_first_commit)
        if days_since_first <= 0:
            # you didn't make the filtering cutoff
            continue
        # since your first commit, how much of the life of the project have you been active?
        percent_active = how_many_days_active_total / float(days_since_first)
        cumulative_percent_active = how_many_days_active_limited / float(
            limited_all_dates_look_back)
        cumulative_percent_active2 = how_many_days_active_limited2 / float(
            limited_all_dates_look_back)
        weight = cumulative_percent_active + (cumulative_percent_active2 * .25)
        person_active.append((name, weight))
        rcolor = percent_active * 0xff
        bcolor = 0
        gcolor = 0
        activity_color = '#%02x%02x%02x' % (rcolor, gcolor, bcolor)
        review_color = '#%02x%02x%02x' % (106, 171, 62)
        commit_color = '#%02x%02x%02x' % (37, 117, 195)

        pyplot.plot(x_vals,
                    cumulative_data,
                    linestyle='-',
                    label=person,
                    linewidth=3,
                    solid_capstyle="butt",
                    alpha=1.0,
                    color=activity_color)
        pyplot.plot(x_vals,
                    commit_data,
                    linestyle='-',
                    label=person,
                    linewidth=10,
                    solid_capstyle="butt",
                    alpha=1.0,
                    color=commit_color)
        pyplot.plot(x_vals,
                    review_data,
                    linestyle='-',
                    label=person,
                    linewidth=5,
                    solid_capstyle="butt",
                    alpha=1.0,
                    color=review_color)
        label_xval = cumulative_data.index(yval) - 3  # move over some for room
        pyplot.annotate(name,
                        xy=(label_xval, yval - 0.25),
                        horizontalalignment='right',
                        color=activity_color)
    pyplot.title('Contributor Actvity (as of %s)' %
                 datetime.datetime.now().date())
    pyplot.yticks([], [])
    person_labels.sort()
    pyplot.ylim(-1, person_labels[-1][0] + 1)
    x_tick_locs = []
    x_tick_vals = []
    today = str(datetime.datetime.now())[:10]
    for i, d in enumerate(all_dates):
        if d in RELEASE_DATES:
            pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        if not i % 60:
            x_tick_locs.append(i)
            x_tick_vals.append(d)
        if d == today:
            pyplot.axvline(x=i, alpha=0.8, color='#cf9b46', linewidth=2)
    x_tick_locs.append(len(all_dates))
    x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs,
                  x_tick_vals,
                  rotation=30,
                  horizontalalignment='right')
    pyplot.xlim(-5, x_tick_locs[-1] + 20)
    pyplot.grid(b=True, which='both', axis='x')
    vertical_size_per_person = 0.3
    vertical_size = vertical_size_per_person * len(person_labels)
    horizontal_size_per_day = 0.02
    horizontal_size = horizontal_size_per_day * len(x_vals)
    ax = pyplot.gca()
    ax.set_frame_on(False)
    fig = pyplot.gcf()
    fig.set_size_inches(horizontal_size, vertical_size)
    fig.savefig('contrib_activity.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()

    # maybe a bad place, but we have the percent active per person, so write it out
    with open(PERCENT_ACTIVE_FILENAME, 'wb') as f:
        for pers, perc in person_active:
            f.write('%s:%s\n' % (pers, perc))
Exemple #22
0
def draw_active_contributors_predictions(people_by_date, start_date, end_date):
    matplotlib.rcParams.update(matplotlib.rcParamsDefault)

    all_dates = list(date_range(start_date, end_date))
    days_to_predict = 365 * 2

    contrib_data = {"ds": all_dates, "y": []}
    for d in all_dates:
        todays_total = set()
        todays_reviewers = people_by_date[d]['reviews']
        todays_authors = people_by_date[d]['contribs']
        todays_total.update(todays_reviewers)
        todays_total.update(todays_authors)
        t = len(todays_total)
        # if t == 0:
        #     t = None
        contrib_data["y"].append(t)

    dataframes = pandas.DataFrame.from_dict(contrib_data)
    dataframes["cap"] = 25
    dataframes["floor"] = 0
    prophet = Prophet(
        changepoint_prior_scale=7.5,
        interval_width=0.025,
        daily_seasonality="auto",
        weekly_seasonality="auto",
        yearly_seasonality="auto",
        changepoint_range=0.80,
        seasonality_prior_scale=2.0,
        # uncertainty_samples=5000,
        growth="logistic")
    prophet.fit(dataframes)
    forecast = prophet.make_future_dataframe(periods=days_to_predict, freq="D")
    forecast["cap"] = 25
    forecast["floor"] = 0
    forecast = prophet.predict(forecast)

    matplotlib.rcParams.update(matplotlib.rcParamsDefault)
    fig, ax = pyplot.subplots(1, 1, figsize=(10, 8))

    # ax.plot(
    #     [d.strftime("%Y-%m%d") for d in forecast["ds"]],
    #     forecast["yhat"],
    #     linestyle="-",
    #     marker="None",
    # )
    # ax.plot(
    #     contrib_data["ds"],
    #     contrib_data["y"],
    #     marker="o",
    #     markersize=1.75,
    #     linestyle="None",
    # )
    # ax.fill_between(
    #     [d.strftime("%Y-%m%d") for d in forecast["ds"]],
    #     forecast["yhat_upper"],
    #     forecast["yhat_lower"],
    #     alpha=0.5,
    # )

    fig = prophet.plot(forecast)
    add_changepoints_to_plot(fig.gca(), prophet, forecast)

    last_day_of_fact = datetime.datetime.strptime(
        end_date, "%Y-%m-%d") + datetime.timedelta(hours=24)
    last_day_of_prediction = datetime.datetime.strptime(
        end_date, "%Y-%m-%d") + datetime.timedelta(hours=24 * days_to_predict)
    # x_tick_locs = []
    # x_tick_vals = []
    # all_dates = list(date_range(start_date, last_day_of_prediction))
    # for i, d in enumerate(all_dates):
    #     # if d in RELEASE_DATES:
    #     #     ax.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
    #     if not i % 60:
    #         x_tick_locs.append(i)
    #         x_tick_vals.append(d)
    # x_tick_locs.append(len(all_dates))
    # if len(all_dates) - x_tick_locs[-1] > 30:
    #     x_tick_vals.append(all_dates[-1])
    # ax.xticks(x_tick_locs, x_tick_vals, rotation=30, horizontalalignment='right')
    # # ax.grid(b=True, which='both', axis='both')
    # ax.xlim(-1, x_tick_locs[-1] + 1)
    # ax.ylim(0, x_tick_locs[-1] + 1)
    # ldi = all_dates.index(last_day_of_fact.strftime("%Y-%m%d"))
    labels = ax.get_xticklabels()
    pyplot.setp(labels, rotation=30, horizontalalignment="right")
    pyplot.axvline(x=last_day_of_fact, color="black", linestyle="--")
    pyplot.title("Contributor Count Predictions")
    # pyplot.ylim(bottom=0)
    # pyplot.tight_layout()
    pyplot.style.use("fivethirtyeight")
    matplotlib.rcParams["font.sans-serif"] = "B612"
    matplotlib.rcParams["font.family"] = "B612"
    matplotlib.rcParams["axes.labelsize"] = 10
    matplotlib.rcParams["xtick.labelsize"] = 8
    matplotlib.rcParams["ytick.labelsize"] = 8
    matplotlib.rcParams["text.color"] = "k"

    fig = pyplot.gcf()
    fig.set_size_inches(24, 8)

    fig.savefig("contrib_predictions.png")

    fig2 = prophet.plot_components(forecast)
    fig2.savefig("contrib_predictions_components.png")
if __name__ == '__main__':

    # init logging based on user params
    function = FACTORY.get(sys.argv[1])
    if function is None:
        logger.error('cant find function : {}'.format(function))

    logger.info('running {}'.format(sys.argv[1]))

    #prep data
    min_date = datetime.date(2014, 1, 1)
    max_date = datetime.date(2015, 8, 23)
    url = 'data_for_financial_sentiment_paper.zip'

    ts = Tseries(date_range(min_date, max_date))

    #trading days first so we can exclude non trading days
    ts.add(StockPriceDataFeeder(url).sp_data())
    ts.remove_null()

    ts.add(LMNDataReader5(url).nt_data())
    ts.add(FFDataReader(url).FF_data())
    ts.add(CRSPDataFeeder(url).crsp_data())

    ts.remove_null()
    ts.pad_missing()
    ts.dummy_vars(lambda x: x.weekday() == 0, 'NWD')
    ts.dummy_vars(lambda x: x.weekday() == 4, 'friday')
    ts.dummy_vars(lambda x: x.month == 1, 'january')
Exemple #24
0
def draw_contrib_activity_graph(dates_by_person, start_date, end_date, extra_window):
    # this graph will show a little bit of the future
    end_date = datetime.datetime.strptime(end_date[:10], '%Y-%m-%d') + extra_window
    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    graphable_data = {}
    order = []
    for person, data in dates_by_person.iteritems():
        first_day = '9999-99-99'
        last_day = '0000-00-00'
        for key in data:
            first_day = min(first_day, min(data[key]))
            last_day = max(last_day, max(data[key]))
        if datetime.datetime.strptime(last_day, '%Y-%m-%d') < start_date:
            continue
        if datetime.datetime.strptime(first_day, '%Y-%m-%d') > end_date:
            continue
        order.append((first_day, last_day, person))
    order.sort(reverse=True)
    for first_day, last_day, person in order:
        review_data = []
        commit_data = []
        cumulative_data = []
        sparse_cumulative_data = []
        yval = len(graphable_data)
        for date in all_dates:
            person_data = dates_by_person[person]
            active_day = False
            if date in person_data['contribs']:
                commit_data.append(yval)
                active_day = True
            else:
                commit_data.append(None)
            if date in person_data['reviews']:
                review_data.append(yval)
                active_day = True
            else:
                review_data.append(None)
            if first_day <= date <= last_day:
                cumulative_data.append(yval)
            else:
                cumulative_data.append(None)
            if active_day:
                sparse_cumulative_data.append(yval)
            else:
                sparse_cumulative_data.append(None)
        lens = map(len, [commit_data, review_data, cumulative_data, sparse_cumulative_data, x_vals])
        assert len(set(lens)) == 1, '%r %s' % (lens, person)
        graphable_data[person] = (yval, commit_data, review_data, cumulative_data, sparse_cumulative_data)

    person_labels = []
    person_active = []
    limited_all_dates_look_back = 180
    for person, (yval, commit_data, review_data, cumulative_data, sparse_cumulative_data) in graphable_data.iteritems():
        name = person.split('<', 1)[0].strip()
        person_labels.append((yval, name))
        how_many_days_active_total = sparse_cumulative_data.count(yval)
        how_many_days_active_limited = sparse_cumulative_data[-limited_all_dates_look_back:].count(yval)
        how_many_days_active_limited2 = sparse_cumulative_data[-limited_all_dates_look_back*2:-limited_all_dates_look_back].count(yval)
        try:
            days_since_first_commit = len(x_vals) - commit_data.index(yval)
        except ValueError:
            days_since_first_commit = 0
        try:
            days_since_first_review = len(x_vals) - review_data.index(yval)
        except ValueError:
            days_since_first_review = 0
        days_since_first = max(days_since_first_review, days_since_first_commit)
        if days_since_first <= 0:
            # you didn't make the filtering cutoff
            continue
        # since your first commit, how much of the life of the project have you been active?
        percent_active = how_many_days_active_total / float(days_since_first)
        cumulative_percent_active = how_many_days_active_limited / float(limited_all_dates_look_back)
        cumulative_percent_active2 = how_many_days_active_limited2 / float(limited_all_dates_look_back)
        weight = cumulative_percent_active + (cumulative_percent_active2 * .25)
        person_active.append((name, weight))
        rcolor = percent_active * 0xff
        bcolor = 0
        gcolor = 0
        activity_color = '#%02x%02x%02x' % (rcolor, gcolor, bcolor)
        review_color = '#%02x%02x%02x' % (106, 171, 62)
        commit_color = '#%02x%02x%02x' % (37, 117, 195)

        pyplot.plot(x_vals, cumulative_data, linestyle='-',
                    label=person, linewidth=3, solid_capstyle="butt",
                    alpha=1.0, color=activity_color)
        pyplot.plot(x_vals, commit_data, linestyle='-',
                    label=person, linewidth=10, solid_capstyle="butt",
                    alpha=1.0, color=commit_color)
        pyplot.plot(x_vals, review_data, linestyle='-',
                    label=person, linewidth=5, solid_capstyle="butt",
                    alpha=1.0, color=review_color)
        label_xval = cumulative_data.index(yval) - 3  # move over some for room
        pyplot.annotate(name, xy=(label_xval, yval - 0.25), horizontalalignment='right', color=activity_color)
    pyplot.title('Contributor Actvity (as of %s)' % datetime.datetime.now().date())
    pyplot.yticks([], [])
    person_labels.sort()
    pyplot.ylim(-1, person_labels[-1][0] + 1)
    x_tick_locs = []
    x_tick_vals = []
    today = str(datetime.datetime.now())[:10]
    for i, d in enumerate(all_dates):
        if d in RELEASE_DATES:
            pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        if not i % 60:
            x_tick_locs.append(i)
            x_tick_vals.append(d)
        if d == today:
            pyplot.axvline(x=i, alpha=0.8, color='#cf9b46', linewidth=2)
    x_tick_locs.append(len(all_dates))
    x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs, x_tick_vals, rotation=30, horizontalalignment='right')
    pyplot.xlim(-5, x_tick_locs[-1] + 20)
    pyplot.grid(b=True, which='both', axis='x')
    vertical_size_per_person = 0.3
    vertical_size = vertical_size_per_person * len(person_labels)
    horizontal_size_per_day = 0.02
    horizontal_size = horizontal_size_per_day * len(x_vals)
    ax = pyplot.gca()
    ax.set_frame_on(False)
    fig = pyplot.gcf()
    fig.set_size_inches(horizontal_size, vertical_size)
    fig.savefig('contrib_activity.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()

    # maybe a bad place, but we have the percent active per person, so write it out
    with open(PERCENT_ACTIVE_FILENAME, 'wb') as f:
        for pers, perc in person_active:
            f.write('%s:%s\n' % (pers, perc))
def main(ini_path=None, overwrite_flag=False, delay=0, key=None):
    """Test for differences in Tcorr from real-time and Collection 1

    Parameters
    ----------
    ini_path : str
        Input file path.
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).
    delay : float, optional
        Delay time between each export task (the default is 0).
    key : str, optional
        File path to an Earth Engine json key file (the default is None).

    """
    logging.info('\nTest Real Time Tcorr')

    # Hardcoding for now...
    tcorr_stats_path = r'C:\Users\mortonc\Google Drive\SSEBop\tcorr_realtime\tcorr_stats.csv'
    # tcorr_stats_path = r'C:\Projects\openet-ssebop\tcorr\tcorr_stats.csv'

    ini = utils.read_ini(ini_path)

    model_name = 'SSEBOP'
    # model_name = ini['INPUTS']['et_model'].upper()

    logging.info('\nInitializing Earth Engine')
    if key:
        logging.info('  Using service account key file: {}'.format(key))
        # The "EE_ACCOUNT" parameter is not used if the key file is valid
        ee.Initialize(ee.ServiceAccountCredentials('deadbeef', key_file=key))
    else:
        ee.Initialize()

    # Get a Tmax image to set the Tcorr values to
    logging.debug('\nTmax properties')
    tmax_name = ini[model_name]['tmax_source']
    tmax_source = tmax_name.split('_', 1)[0]
    tmax_version = tmax_name.split('_', 1)[1]
    tmax_coll_id = 'projects/usgs-ssebop/tmax/{}'.format(tmax_name.lower())
    tmax_coll = ee.ImageCollection(tmax_coll_id)
    tmax_mask = ee.Image(tmax_coll.first()).select([0]).multiply(0)
    logging.debug('  Collection: {}'.format(tmax_coll_id))
    logging.debug('  Source:  {}'.format(tmax_source))
    logging.debug('  Version: {}'.format(tmax_version))

    if not os.path.isfile(tcorr_stats_path):
        logging.debug('\nBuilding new Tcorr dataframe')
        tcorr_df = pd.DataFrame(columns=[
            'IMAGE_ID', 'IMAGE_DATE', 'COLLECTION', 'TCORR', 'COUNT',
            'EXPORT_DATE'
        ])
        c1_id_set = set()
        rt_id_set = set()
    else:
        logging.debug('\nLoading exist Tcorr dataframe')
        logging.debug('  {}'.format(tcorr_stats_path))
        tcorr_df = pd.read_csv(tcorr_stats_path)
        c1_id_set = set(tcorr_df.loc[tcorr_df['COLLECTION'] == 'C1',
                                     'IMAGE_ID'])
        rt_id_set = set(tcorr_df.loc[tcorr_df['COLLECTION'] == 'RT',
                                     'IMAGE_ID'])
        logging.debug(tcorr_df.head())

    # CGM - This seems like a silly way of getting the date as a datetime
    iter_end_dt = datetime.date.today().strftime('%Y-%m-%d')
    iter_end_dt = datetime.datetime.strptime(iter_end_dt, '%Y-%m-%d')
    iter_end_dt = iter_end_dt + datetime.timedelta(days=-1)
    # iter_end_dt = datetime.datetime.today() + datetime.timedelta(days=-1)
    iter_start_dt = iter_end_dt + datetime.timedelta(days=-64)
    logging.debug('Start Date: {}'.format(iter_start_dt.strftime('%Y-%m-%d')))
    logging.debug('End Date:   {}\n'.format(iter_end_dt.strftime('%Y-%m-%d')))

    # Iterate over date ranges
    for iter_dt in reversed(list(utils.date_range(iter_start_dt,
                                                  iter_end_dt))):
        logging.info('Date: {}'.format(iter_dt.strftime('%Y-%m-%d')))

        # Build and merge the Real-Time Landsat collections
        l8_rt_coll = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') \
            .filterDate(iter_dt, iter_dt + datetime.timedelta(days=1)) \
            .filterBounds(tmax_mask.geometry()) \
            .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                            float(ini['INPUTS']['cloud_cover'])) \
            .filterMetadata('DATA_TYPE', 'equals', 'L1TP')
        l7_rt_coll = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA') \
            .filterDate(iter_dt, iter_dt + datetime.timedelta(days=1)) \
            .filterBounds(tmax_mask.geometry()) \
            .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                            float(ini['INPUTS']['cloud_cover'])) \
            .filterMetadata('DATA_TYPE', 'equals', 'L1TP')
        rt_coll = ee.ImageCollection(l8_rt_coll.merge(l7_rt_coll))

        # Build and merge the final Collection 1 collections
        l8_c1_coll = ee.ImageCollection('LANDSAT/LC08/C01/T1_TOA') \
            .filterDate(iter_dt, iter_dt + datetime.timedelta(days=1)) \
            .filterBounds(tmax_mask.geometry()) \
            .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                            float(ini['INPUTS']['cloud_cover'])) \
            .filterMetadata('DATA_TYPE', 'equals', 'L1TP')
        l7_c1_coll = ee.ImageCollection('LANDSAT/LE07/C01/T1_TOA') \
            .filterDate(iter_dt, iter_dt + datetime.timedelta(days=1)) \
            .filterBounds(tmax_mask.geometry()) \
            .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                            float(ini['INPUTS']['cloud_cover'])) \
            .filterMetadata('DATA_TYPE', 'equals', 'L1TP')
        c1_coll = ee.ImageCollection(l8_c1_coll.merge(l7_c1_coll))

        # Get the Image IDs that haven't been processed
        logging.info('  Getting Missing Asset IDs')
        rt_id_list = [
            id for id in rt_coll.aggregate_array('system:id').getInfo()
            if id.split('/')[-1] not in rt_id_set
        ]
        c1_id_list = [
            id for id in c1_coll.aggregate_array('system:id').getInfo()
            if id.split('/')[-1] not in c1_id_set
        ]

        if not rt_id_list and not c1_id_list:
            logging.info('  No new images, skipping date')
            continue

        logging.info('  Real-time')
        for asset_id in rt_id_list:
            logging.info('  {}'.format(asset_id))
            t_stats = ssebop.Image.from_landsat_c1_toa(ee.Image(asset_id))\
                .tcorr_stats\
                .getInfo()
            if t_stats['tcorr_value'] is None:
                t_stats['tcorr_value'] = ''
            image_id = asset_id.split('/')[-1]
            tcorr_df = tcorr_df.append(
                {
                    'IMAGE_ID':
                    image_id,
                    'IMAGE_DATE':
                    datetime.datetime.strptime(
                        image_id.split('_')[2], '%Y%m%d').strftime('%Y-%m-%d'),
                    'COLLECTION':
                    'RT',
                    'TCORR':
                    t_stats['tcorr_value'],
                    'COUNT':
                    t_stats['tcorr_count'],
                    'EXPORT_DATE':
                    datetime.datetime.today().strftime('%Y-%m-%d')
                },
                ignore_index=True)

        logging.info('  Collection 1')
        for asset_id in c1_id_list:
            logging.info('  {}'.format(asset_id))
            t_stats = ssebop.Image.from_landsat_c1_toa(ee.Image(asset_id))\
                .tcorr_stats\
                .getInfo()
            if t_stats['tcorr_value'] is None:
                t_stats['tcorr_value'] = ''
            image_id = asset_id.split('/')[-1]
            tcorr_df = tcorr_df.append(
                {
                    'IMAGE_ID':
                    asset_id.split('/')[-1],
                    'IMAGE_DATE':
                    datetime.datetime.strptime(
                        image_id.split('_')[2], '%Y%m%d').strftime('%Y-%m-%d'),
                    'COLLECTION':
                    'C1',
                    'TCORR':
                    t_stats['tcorr_value'],
                    'COUNT':
                    t_stats['tcorr_count'],
                    'EXPORT_DATE':
                    datetime.datetime.today().strftime('%Y-%m-%d')
                },
                ignore_index=True)

        # Export the current dataframe to disk
        logging.info('  Writing CSV')
        tcorr_df.sort_values(by=['IMAGE_ID', 'COLLECTION'], inplace=True)
        # tcorr_df.sort_values(by=['COLLECTION', 'IMAGE_ID'], inplace=True)
        tcorr_df.to_csv(tcorr_stats_path, index=None)
Exemple #26
0
    def build_r_0_arr(self):
        """Returns an array of the reproduction numbers (R) for each day.

        Each element in the array represents a single day in the simulation.
            For example, if self.first_date is 2020-03-01 and self.projection_end_date
            is 2020-09-01, then R_0_ARR[10] would be the R value on 2020-03-11.

        Full description at: https://covid19-projections.com/about/#effective-reproduction-number-r
            and https://covid19-projections.com/model-details/#modeling-the-r-value

        We use three different R values: R0, post-mitigation R, and reopen R.
            We use an inverse logistic/sigmoid function to smooth the transition between
            the three R values.
        """

        reopen_r = self.get_reopen_r()
        if self.use_min_reopen_equilibrium_r:
            post_reopen_r = min(reopen_r, self.post_reopen_equilibrium_r)
        else:
            post_reopen_r = self.post_reopen_equilibrium_r
        assert 0.5 <= self.LOCKDOWN_FATIGUE <= 1.5, self.LOCKDOWN_FATIGUE

        reopen_date_shift = self.REOPEN_DATE + \
            datetime.timedelta(days=int(self.REOPEN_SHIFT_DAYS) + DEFAULT_REOPEN_SHIFT_DAYS)
        fatigue_idx = self.inflection_day_idx + DAYS_UNTIL_LOCKDOWN_FATIGUE
        reopen_idx = self.get_day_idx_from_date(reopen_date_shift)
        lockdown_reopen_midpoint_idx = (self.inflection_day_idx +
                                        reopen_idx) // 2

        NUMERATOR_CONST = 6
        days_until_post_reopen = int(
            np.rint(NUMERATOR_CONST / self.REOPEN_INFLECTION))
        assert 10 <= days_until_post_reopen <= 80, days_until_post_reopen
        post_reopen_midpoint_idx = reopen_idx + days_until_post_reopen
        post_reopen_idx = reopen_idx + days_until_post_reopen * 2

        if self.country_str == 'US' or (self.country_str in EUROPEAN_COUNTRIES and \
                self.post_reopen_mode and self.post_reopen_mode < 1):
            post_reopen_days_shift = 60 if (
                self.post_reopen_mode
                and self.post_reopen_mode <= 0.95) else 45
        else:
            post_reopen_days_shift = 30
        fall_start_idx = self.get_day_idx_from_date(
            FALL_START_DATE_NORTH) - post_reopen_days_shift

        sig_lockdown = get_transition_sigmoid(self.inflection_day_idx,
                                              self.rate_of_inflection,
                                              self.INITIAL_R_0,
                                              self.LOCKDOWN_R_0)
        sig_fatigue = get_transition_sigmoid(fatigue_idx,
                                             0.2,
                                             0,
                                             self.LOCKDOWN_FATIGUE - 1,
                                             check_values=False)
        sig_reopen = get_transition_sigmoid(
            reopen_idx, self.REOPEN_INFLECTION,
            self.LOCKDOWN_R_0 * self.LOCKDOWN_FATIGUE, reopen_r)
        sig_post_reopen = get_transition_sigmoid(post_reopen_idx,
                                                 self.REOPEN_INFLECTION,
                                                 reopen_r, post_reopen_r)

        dates = utils.date_range(self.first_date, self.projection_end_date)
        assert len(dates) == self.N

        R_0_ARR = [self.INITIAL_R_0]
        for day_idx in range(1, self.N):
            if day_idx < lockdown_reopen_midpoint_idx:
                r_t = sig_lockdown(day_idx)
                if abs(self.LOCKDOWN_FATIGUE - 1) > 1e-9:
                    r_t *= 1 + sig_fatigue(day_idx)
            elif day_idx > post_reopen_midpoint_idx:
                r_t = sig_post_reopen(day_idx)
            else:
                r_t = sig_reopen(day_idx)

            if day_idx > fall_start_idx:
                fall_r_mult = max(
                    0.9,
                    min(1.35,
                        self.fall_r_multiplier**(day_idx - fall_start_idx)))
                assert 0.9 <= fall_r_mult <= 1.5, fall_r_mult
                r_t *= fall_r_mult

            # Make sure R is stable
            if day_idx > reopen_idx and abs(r_t / R_0_ARR[-1] - 1) > 0.2:
                assert False, \
                    f'{str(self)} - R changed too quickly: {day_idx} {R_0_ARR[-1]} -> {r_t} {R_0_ARR}'

            R_0_ARR.append(r_t)

        assert len(R_0_ARR) == self.N
        self.reopen_idx = reopen_idx

        return R_0_ARR
def main(ini_path=None, overwrite_flag=False, delay_time=0, gee_key_file=None,
         max_ready=-1, cron_flag=False, reverse_flag=False):
    """Compute daily Tcorr images

    Parameters
    ----------
    ini_path : str
        Input file path.
    overwrite_flag : bool, optional
        If True, overwrite existing files if the export dates are the same and
        generate new images (but with different export dates) even if the tile
        lists are the same.  The default is False.
    delay_time : float, optional
        Delay time in seconds between starting export tasks (or checking the
        number of queued tasks, see "max_ready" parameter).  The default is 0.
    gee_key_file : str, None, optional
        Earth Engine service account JSON key file (the default is None).
    max_ready: int, optional
        Maximum number of queued "READY" tasks.  The default is -1 which is
        implies no limit to the number of tasks that will be submitted.
    cron_flag : bool, optional
        If True, only compute Tcorr daily image if existing image does not have
        all available image (using the 'wrs2_tiles' property) and limit the
        date range to the last 64 days (~2 months).
    reverse_flag : bool, optional
        If True, process dates in reverse order.
    """
    logging.info('\nCompute daily Tcorr images')

    ini = utils.read_ini(ini_path)

    model_name = 'SSEBOP'
    # model_name = ini['INPUTS']['et_model'].upper()

    tmax_name = ini[model_name]['tmax_source']

    export_id_fmt = 'tcorr_image_{product}_{date}_{export}'
    asset_id_fmt = '{coll_id}/{date}_{export}'

    tcorr_daily_coll_id = '{}/{}_daily'.format(
        ini['EXPORT']['export_coll'], tmax_name.lower())

    if (tmax_name.upper() == 'CIMIS' and
            ini['INPUTS']['end_date'] < '2003-10-01'):
        logging.error(
            '\nCIMIS is not currently available before 2003-10-01, exiting\n')
        sys.exit()
    elif (tmax_name.upper() == 'DAYMET' and
            ini['INPUTS']['end_date'] > '2018-12-31'):
        logging.warning(
            '\nDAYMET is not currently available past 2018-12-31, '
            'using median Tmax values\n')
        # sys.exit()
    # elif (tmax_name.upper() == 'TOPOWX' and
    #         ini['INPUTS']['end_date'] > '2017-12-31'):
    #     logging.warning(
    #         '\nDAYMET is not currently available past 2017-12-31, '
    #         'using median Tmax values\n')
    #     # sys.exit()

    # Extract the model keyword arguments from the INI
    # Set the property name to lower case and try to cast values to numbers
    model_args = {
        k.lower(): float(v) if utils.is_number(v) else v
        for k, v in dict(ini[model_name]).items()}
    # et_reference_args = {
    #     k: model_args.pop(k)
    #     for k in [k for k in model_args.keys() if k.startswith('et_reference_')]}


    logging.info('\nInitializing Earth Engine')
    if gee_key_file:
        logging.info('  Using service account key file: {}'.format(gee_key_file))
        # The "EE_ACCOUNT" parameter is not used if the key file is valid
        ee.Initialize(ee.ServiceAccountCredentials('x', key_file=gee_key_file),
                      use_cloud_api=True)
    else:
        ee.Initialize(use_cloud_api=True)

    # Get a Tmax image to set the Tcorr values to
    logging.debug('\nTmax properties')
    tmax_source = tmax_name.split('_', 1)[0]
    tmax_version = tmax_name.split('_', 1)[1]
    if 'MEDIAN' in tmax_name.upper():
        tmax_coll_id = 'projects/earthengine-legacy/assets/' \
                       'projects/usgs-ssebop/tmax/{}'.format(tmax_name.lower())
        tmax_coll = ee.ImageCollection(tmax_coll_id)
        tmax_mask = ee.Image(tmax_coll.first()).select([0]).multiply(0)
    else:
        # TODO: Add support for non-median tmax sources
        raise ValueError('unsupported tmax_source: {}'.format(tmax_name))
    logging.debug('  Collection: {}'.format(tmax_coll_id))
    logging.debug('  Source:  {}'.format(tmax_source))
    logging.debug('  Version: {}'.format(tmax_version))

    logging.debug('\nExport properties')
    export_info = utils.get_info(ee.Image(tmax_mask))
    if 'daymet' in tmax_name.lower():
        # Custom smaller extent for DAYMET focused on CONUS
        export_extent = [-1999750, -1890500, 2500250, 1109500]
        export_shape = [4500, 3000]
        export_geo = [1000, 0, -1999750, 0, -1000, 1109500]
        # Custom medium extent for DAYMET of CONUS, Mexico, and southern Canada
        # export_extent = [-2099750, -3090500, 2900250, 1909500]
        # export_shape = [5000, 5000]
        # export_geo = [1000, 0, -2099750, 0, -1000, 1909500]
        export_crs = export_info['bands'][0]['crs']
    else:
        export_crs = export_info['bands'][0]['crs']
        export_geo = export_info['bands'][0]['crs_transform']
        export_shape = export_info['bands'][0]['dimensions']
        # export_geo = ee.Image(tmax_mask).projection().getInfo()['transform']
        # export_crs = ee.Image(tmax_mask).projection().getInfo()['crs']
        # export_shape = ee.Image(tmax_mask).getInfo()['bands'][0]['dimensions']
        export_extent = [
            export_geo[2], export_geo[5] + export_shape[1] * export_geo[4],
            export_geo[2] + export_shape[0] * export_geo[0], export_geo[5]]
    logging.debug('  CRS: {}'.format(export_crs))
    logging.debug('  Extent: {}'.format(export_extent))
    logging.debug('  Geo: {}'.format(export_geo))
    logging.debug('  Shape: {}'.format(export_shape))


    # This extent will limit the WRS2 tiles that are included
    # This is needed especially for non-median DAYMET Tmax since the default
    #   extent is huge but we are only processing a subset
    if 'daymet' in tmax_name.lower():
        export_geom = ee.Geometry.Rectangle(
            [-125, 25, -65, 53], proj='EPSG:4326', geodesic=False)
        # export_geom = ee.Geometry.Rectangle(
        #     [-135, 15, -55, 60], proj='EPSG:4326', geodesic=False)
    elif 'cimis' in tmax_name.lower():
        export_geom = ee.Geometry.Rectangle(
            [-124, 35, -119, 42], proj='EPSG:4326', geodesic=False)
    else:
        export_geom = tmax_mask.geometry()


    # If cell_size parameter is set in the INI,
    # adjust the output cellsize and recompute the transform and shape
    try:
        export_cs = float(ini['EXPORT']['cell_size'])
        export_shape = [
            int(math.ceil(abs((export_shape[0] * export_geo[0]) / export_cs))),
            int(math.ceil(abs((export_shape[1] * export_geo[4]) / export_cs)))]
        export_geo = [export_cs, 0.0, export_geo[2], 0.0, -export_cs, export_geo[5]]
        logging.debug('  Custom export cell size: {}'.format(export_cs))
        logging.debug('  Geo: {}'.format(export_geo))
        logging.debug('  Shape: {}'.format(export_shape))
    except KeyError:
        pass

    if not ee.data.getInfo(tcorr_daily_coll_id):
        logging.info('\nExport collection does not exist and will be built'
                     '\n  {}'.format(tcorr_daily_coll_id))
        input('Press ENTER to continue')
        ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, tcorr_daily_coll_id)

    # Get current asset list
    logging.debug('\nGetting GEE asset list')
    asset_list = utils.get_ee_assets(tcorr_daily_coll_id)
    if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
        pprint.pprint(asset_list[:10])

    # Get current running tasks
    tasks = utils.get_ee_tasks()
    if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
        logging.debug('  Tasks: {}\n'.format(len(tasks)))
        input('ENTER')


    collections = [x.strip() for x in ini['INPUTS']['collections'].split(',')]

    # Limit by year and month
    try:
        month_list = sorted(list(utils.parse_int_set(ini['TCORR']['months'])))
    except:
        logging.info('\nTCORR "months" parameter not set in the INI,'
                     '\n  Defaulting to all months (1-12)\n')
        month_list = list(range(1, 13))
    try:
        year_list = sorted(list(utils.parse_int_set(ini['TCORR']['years'])))
    except:
        logging.info('\nTCORR "years" parameter not set in the INI,'
                     '\n  Defaulting to all available years\n')
        year_list = []

    # Key is cycle day, value is a reference date on that cycle
    # Data from: https://landsat.usgs.gov/landsat_acq
    # I only need to use 8 cycle days because of 5/7 and 7/8 are offset
    cycle_dates = {
        7: '1970-01-01',
        8: '1970-01-02',
        1: '1970-01-03',
        2: '1970-01-04',
        3: '1970-01-05',
        4: '1970-01-06',
        5: '1970-01-07',
        6: '1970-01-08',
    }
    # cycle_dates = {
    #     1:  '2000-01-06',
    #     2:  '2000-01-07',
    #     3:  '2000-01-08',
    #     4:  '2000-01-09',
    #     5:  '2000-01-10',
    #     6:  '2000-01-11',
    #     7:  '2000-01-12',
    #     8:  '2000-01-13',
    #     # 9:  '2000-01-14',
    #     # 10: '2000-01-15',
    #     # 11: '2000-01-16',
    #     # 12: '2000-01-01',
    #     # 13: '2000-01-02',
    #     # 14: '2000-01-03',
    #     # 15: '2000-01-04',
    #     # 16: '2000-01-05',
    # }
    cycle_base_dt = datetime.datetime.strptime(cycle_dates[1], '%Y-%m-%d')

    if cron_flag:
        # CGM - This seems like a silly way of getting the date as a datetime
        #   Why am I doing this and not using the commented out line?
        iter_end_dt = datetime.date.today().strftime('%Y-%m-%d')
        iter_end_dt = datetime.datetime.strptime(iter_end_dt, '%Y-%m-%d')
        iter_end_dt = iter_end_dt + datetime.timedelta(days=-4)
        # iter_end_dt = datetime.datetime.today() + datetime.timedelta(days=-1)
        iter_start_dt = iter_end_dt + datetime.timedelta(days=-64)
    else:
        iter_start_dt = datetime.datetime.strptime(
            ini['INPUTS']['start_date'], '%Y-%m-%d')
        iter_end_dt = datetime.datetime.strptime(
            ini['INPUTS']['end_date'], '%Y-%m-%d')
    logging.debug('Start Date: {}'.format(iter_start_dt.strftime('%Y-%m-%d')))
    logging.debug('End Date:   {}\n'.format(iter_end_dt.strftime('%Y-%m-%d')))


    for export_dt in sorted(utils.date_range(iter_start_dt, iter_end_dt),
                            reverse=reverse_flag):
        export_date = export_dt.strftime('%Y-%m-%d')
        next_date = (export_dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
        if month_list and export_dt.month not in month_list:
            logging.debug(f'Date: {export_date} - month not in INI - skipping')
            continue
        elif year_list and export_dt.year not in year_list:
            logging.debug(f'Date: {export_date} - year not in INI - skipping')
            continue
        elif export_date >= datetime.datetime.today().strftime('%Y-%m-%d'):
            logging.debug(f'Date: {export_date} - unsupported date - skipping')
            continue
        elif export_date < '1984-03-23':
            logging.debug(f'Date: {export_date} - no Landsat 5+ images before '
                         '1984-03-16 - skipping')
            continue
        logging.info(f'Date: {export_date}')

        export_id = export_id_fmt.format(
            product=tmax_name.lower(),
            date=export_dt.strftime('%Y%m%d'),
            export=datetime.datetime.today().strftime('%Y%m%d'))
        logging.debug('  Export ID: {}'.format(export_id))

        asset_id = asset_id_fmt.format(
            coll_id=tcorr_daily_coll_id,
            date=export_dt.strftime('%Y%m%d'),
            export=datetime.datetime.today().strftime('%Y%m%d'))
        logging.debug('  Asset ID: {}'.format(asset_id))

        if overwrite_flag:
            if export_id in tasks.keys():
                logging.debug('  Task already submitted, cancelling')
                ee.data.cancelTask(tasks[export_id]['id'])
            # This is intentionally not an "elif" so that a task can be
            # cancelled and an existing image/file/asset can be removed
            if asset_id in asset_list:
                logging.debug('  Asset already exists, removing')
                ee.data.deleteAsset(asset_id)
        else:
            if export_id in tasks.keys():
                logging.debug('  Task already submitted, exiting')
                continue
            elif asset_id in asset_list:
                logging.debug('  Asset already exists, skipping')
                continue

        # Build and merge the Landsat collections
        model_obj = ssebop.Collection(
            collections=collections,
            start_date=export_dt.strftime('%Y-%m-%d'),
            end_date=(export_dt + datetime.timedelta(days=1)).strftime(
                '%Y-%m-%d'),
            cloud_cover_max=float(ini['INPUTS']['cloud_cover']),
            geometry=export_geom,
            model_args=model_args,
            # filter_args=filter_args,
        )
        landsat_coll = model_obj.overpass(variables=['ndvi'])
        # wrs2_tiles_all = model_obj.get_image_ids()
        # pprint.pprint(landsat_coll.aggregate_array('system:id').getInfo())
        # input('ENTER')

        logging.debug('  Getting available WRS2 tile list')
        landsat_id_list = utils.get_info(landsat_coll.aggregate_array('system:id'))
        if not landsat_id_list:
            logging.info('  No available images - skipping')
            continue
        wrs2_tiles_all = set([id.split('_')[-2] for id in landsat_id_list])
        # print(wrs2_tiles_all)
        # print('\n')

        def tile_set_2_str(tiles):
            """Trying to build a more compact version of the WRS2 tile list"""
            tile_dict = defaultdict(list)
            for tile in tiles:
                tile_dict[int(tile[:3])].append(int(tile[3:]))
            tile_dict = {k: sorted(v) for k, v in tile_dict.items()}
            tile_str = json.dumps(tile_dict, sort_keys=True) \
                .replace('"', '').replace(' ', '')\
                .replace('{', '').replace('}', '')
            return tile_str
        wrs2_tiles_all_str = tile_set_2_str(wrs2_tiles_all)
        # pprint.pprint(wrs2_tiles_all_str)
        # print('\n')

        def tile_str_2_set(tile_str):
            # tile_dict = eval(tile_str)

            tile_set = set()
            for t in tile_str.replace('[', '').split('],'):
                path = int(t.split(':')[0])
                for row in t.split(':')[1].replace(']', '').split(','):
                    tile_set.add('{:03d}{:03d}'.format(path, int(row)))
            return tile_set
        # wrs2_tiles_all_dict = tile_str_2_set(wrs2_tiles_all_str)
        # pprint.pprint(wrs2_tiles_all_dict)


        # If overwriting, start a new export no matter what
        # The default is to no overwrite, so this mode will not be used often
        if not overwrite_flag:
            # Check if there are any previous images for this date
            # If so, only build a new Tcorr image if there are new wrs2_tiles
            #   that were not used in the previous image.
            # Should this code only be run in cron mode or is this the expected
            #   operation when (re)running for any date range?
            # Should we only test the last image
            # or all previous images for the date?
            logging.debug('  Checking for previous exports/versions of daily image')
            tcorr_daily_coll = ee.ImageCollection(tcorr_daily_coll_id)\
                .filterDate(export_date, next_date)\
                .limit(1, 'date_ingested', False)
            tcorr_daily_info = utils.get_info(tcorr_daily_coll)
            # pprint.pprint(tcorr_daily_info)
            # input('ENTER')

            if tcorr_daily_info['features']:
                # Assume we won't be building a new image and only set flag
                #   to True if the WRS2 tile lists are different
                export_flag = False

                # The ".limit(1, ..." on the tcorr_daily_coll above makes this
                # for loop and break statement unnecessary, but leaving for now
                for tcorr_img in tcorr_daily_info['features']:
                    # If the full WRS2 list is not present, rebuild the image
                    # This should only happen for much older Tcorr images
                    if 'wrs2_available' not in tcorr_img['properties'].keys():
                        logging.debug(
                            '    "wrs2_available" property not present in '
                            'previous export')
                        export_flag = True
                        break

                    # DEADBEEF - The wrs2_available property is now a string
                    # wrs2_tiles_old = set(tcorr_img['properties']['wrs2_available'].split(','))

                    # Convert available dict str to a list of path/rows
                    wrs2_tiles_old_str = tcorr_img['properties']['wrs2_available']
                    wrs2_tiles_old = tile_str_2_set(wrs2_tiles_old_str)

                    if wrs2_tiles_all != wrs2_tiles_old:
                        logging.debug('  Tile Lists')
                        logging.debug('  Previous: {}'.format(', '.join(
                            sorted(wrs2_tiles_old))))
                        logging.debug('  Available: {}'.format(', '.join(
                            sorted(wrs2_tiles_all))))
                        logging.debug('  New: {}'.format(', '.join(
                            sorted(wrs2_tiles_all.difference(wrs2_tiles_old)))))
                        logging.debug('  Dropped: {}'.format(', '.join(
                            sorted(wrs2_tiles_old.difference(wrs2_tiles_all)))))

                        export_flag = True
                        break

                if not export_flag:
                    logging.debug('  No new WRS2 tiles/images - skipping')
                    continue
                # else:
                #     logging.debug('    Building new version')
            else:
                logging.debug('    No previous exports')

        def tcorr_img_func(image):
            t_obj = ssebop.Image.from_landsat_c1_toa(
                ee.Image(image), **model_args)
            t_stats = ee.Dictionary(t_obj.tcorr_stats) \
                .combine({'tcorr_p5': 0, 'tcorr_count': 0}, overwrite=False)
            tcorr = ee.Number(t_stats.get('tcorr_p5'))
            count = ee.Number(t_stats.get('tcorr_count'))

            # Remove the merged collection indices from the system:index
            scene_id = ee.List(
                ee.String(image.get('system:index')).split('_')).slice(-3)
            scene_id = ee.String(scene_id.get(0)).cat('_') \
                .cat(ee.String(scene_id.get(1))).cat('_') \
                .cat(ee.String(scene_id.get(2)))

            return tmax_mask.add(tcorr) \
                .rename(['tcorr']) \
                .clip(image.geometry()) \
                .set({
                    'system:time_start': image.get('system:time_start'),
                    'scene_id': scene_id,
                    'wrs2_path': ee.Number.parse(scene_id.slice(5, 8)),
                    'wrs2_row': ee.Number.parse(scene_id.slice(8, 11)),
                    'wrs2_tile': scene_id.slice(5, 11),
                    'spacecraft_id': image.get('SPACECRAFT_ID'),
                    'tcorr': tcorr,
                    'count': count,
                })
        # Test for one image
        # pprint.pprint(tcorr_img_func(ee.Image(landsat_coll \
        #     .filterMetadata('WRS_PATH', 'equals', 36) \
        #     .filterMetadata('WRS_ROW', 'equals', 33).first())).getInfo())
        # input('ENTER')

        # (Re)build the Landsat collection from the image IDs
        landsat_coll = ee.ImageCollection(landsat_id_list)
        tcorr_img_coll = ee.ImageCollection(landsat_coll.map(tcorr_img_func)) \
            .filterMetadata('count', 'not_less_than',
                            float(ini['TCORR']['min_pixel_count']))

        # If there are no Tcorr values, return an empty image
        tcorr_img = ee.Algorithms.If(
            tcorr_img_coll.size().gt(0),
            tcorr_img_coll.median(),
            tmax_mask.updateMask(0))


        # Build the tile list as a string of a dictionary of paths and rows
        def tile_dict(path):
            # Get the row list for each path
            rows = tcorr_img_coll\
                .filterMetadata('wrs2_path', 'equals', path)\
                .aggregate_array('wrs2_row')
            # Convert rows to integers (otherwise they come back as floats)
            rows = ee.List(rows).sort().map(lambda row: ee.Number(row).int())
            return ee.Number(path).format('%d').cat(':[')\
                .cat(ee.List(rows).join(',')).cat(']')

        path_list = ee.List(tcorr_img_coll.aggregate_array('wrs2_path'))\
            .distinct().sort()
        wrs2_tile_str = ee.List(path_list.map(tile_dict)).join(',')
        # pprint.pprint(wrs2_tile_str.getInfo())
        # input('ENTER')

        # # DEADBEEF - This works but is really slow because of the getInfo
        # logging.debug('  Getting Tcorr collection tile list')
        # wrs2_tile_list = utils.get_info(
        #     tcorr_img_coll.aggregate_array('wrs2_tile'))
        # wrs2_tile_str = tile_set_2_str(wrs2_tile_list)
        # pprint.pprint(wrs2_tile_list)
        # pprint.pprint(wrs2_tile_str)
        # input('ENTER')

        # DEADBEEF - Old approach, tile lists for big areas are too long
        # def unique_properties(coll, property):
        #     return ee.String(ee.List(ee.Dictionary(
        #         coll.aggregate_histogram(property)).keys()).join(','))
        # wrs2_tile_list = ee.String('').cat(unique_properties(
        #     tcorr_img_coll, 'wrs2_tile'))
        # wrs2_tile_list = set([id.split('_')[-2] for id in wrs2_tile_list])


        def unique_properties(coll, property):
            return ee.String(ee.List(ee.Dictionary(
                coll.aggregate_histogram(property)).keys()).join(','))
        landsat_list = ee.String('').cat(unique_properties(
            tcorr_img_coll, 'spacecraft_id'))


        # Cast to float and set properties
        tcorr_img = ee.Image(tcorr_img).rename(['tcorr']).double() \
            .set({
                'system:time_start': utils.millis(export_dt),
                'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'),
                'date': export_dt.strftime('%Y-%m-%d'),
                'year': int(export_dt.year),
                'month': int(export_dt.month),
                'day': int(export_dt.day),
                'doy': int(export_dt.strftime('%j')),
                'cycle_day': ((export_dt - cycle_base_dt).days % 8) + 1,
                'landsat': landsat_list,
                'model_name': model_name,
                'model_version': ssebop.__version__,
                'tmax_source': tmax_source.upper(),
                'tmax_version': tmax_version.upper(),
                'wrs2_tiles': wrs2_tile_str,
                'wrs2_available': wrs2_tiles_all_str,
            })
        # pprint.pprint(tcorr_img.getInfo()['properties'])
        # input('ENTER')

        logging.debug('  Building export task')
        task = ee.batch.Export.image.toAsset(
            image=ee.Image(tcorr_img),
            description=export_id,
            assetId=asset_id,
            crs=export_crs,
            crsTransform='[' + ','.join(list(map(str, export_geo))) + ']',
            dimensions='{0}x{1}'.format(*export_shape),
        )

        logging.info('  Starting export task')
        utils.ee_task_start(task)

        # Pause before starting the next export task
        utils.delay_task(delay_time, max_ready)
        logging.debug('')
Exemple #28
0
    global_last_date = str(max(last_contrib_date, last_review_date))
    msg = []
    msg.append('Global first date is: %s' % global_first_date)
    msg.append('Global last date is: %s' % global_last_date)
    unique_reviewer_set = set()

    actives_windows = [
        # (days, (rolling_avg_span, ...))
        (30, (180, 365)),
        (7, (30, 180)),
    ]
    actives = {x: [] for (x, _) in actives_windows}
    rolling_sets = {x: RollingSet(x) for (x, _) in actives_windows}
    actives_avg = {x: defaultdict(list) for (x, _) in actives_windows}

    for date in date_range(global_first_date, global_last_date):
        contribs = contribs_by_date.get(date, set())
        reviews = reviewers_by_date.get(date, set())
        mapped_contribs = set()
        for person in contribs:
            name, email = person.split('<', 1)
            email = '<' + email
            p = '%s %s' % (map_one_person(person), email)
            if p.lower() in excluded_authors:
                continue
            mapped_contribs.add(name)
        mapped_reviews = set()
        for person in reviews:
            name, email = person.split('<', 1)
            email = '<' + email
            p = '%s %s' % (map_one_person(person), email)
Exemple #29
0
def draw_active_contribs_trends2(actives_windows, actives, actives_avg,
                                 start_date, end_date):
    #todo shade between vertical lines to deliniate feature work (eg sp or ec or crypto)
    matplotlib.rcParams.update(matplotlib.rcParamsDefault)
    pyplot.style.use("fivethirtyeight")
    matplotlib.rcParams["font.sans-serif"] = "B612"
    matplotlib.rcParams["font.family"] = "B612"
    matplotlib.rcParams["axes.labelsize"] = 10
    matplotlib.rcParams["xtick.labelsize"] = 8
    matplotlib.rcParams["ytick.labelsize"] = 8
    matplotlib.rcParams["text.color"] = "k"

    prop_cycle = pyplot.rcParams["axes.prop_cycle"]
    all_colors = itertools.cycle(prop_cycle.by_key()["color"])

    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    len_all_dates = len(all_dates)
    max_yval = 0
    for aw, rolling_avg_windows in actives_windows:
        for r_a_w in rolling_avg_windows:
            pyplot.plot(x_vals,
                        actives_avg[aw][r_a_w][-len_all_dates:],
                        '-',
                        label="%d day avg of %d day total" % (r_a_w, aw),
                        linewidth=3,
                        color=next(all_colors))
            max_yval = max(max_yval, *actives_avg[aw][r_a_w][-len_all_dates:])
    x_tick_locs = []
    x_tick_vals = []
    for i, d in enumerate(all_dates):
        # if d in RELEASE_DATES:
        #     pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        # if not i % 60:
        y, m, day = d.split('-')
        if m in ('01', '04', '07', '10') and day == '01':
            x_tick_locs.append(i)
            x_tick_vals.append(d)
    x_tick_locs.append(len(all_dates))
    if len(all_dates) - x_tick_locs[-1] > 30:
        x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs,
                  x_tick_vals,
                  rotation=30,
                  horizontalalignment='right')

    for start, end, name in FEATURES:
        start_index = all_dates.index(start)
        if end is None:
            end_index = len(all_dates)
        else:
            end_index = all_dates.index(end)
        pyplot.axvspan(start_index,
                       end_index,
                       label=name,
                       alpha=0.3,
                       color=next(all_colors))

    pyplot.title('Active contributors (as of %s)' %
                 datetime.datetime.now().date())
    pyplot.ylabel('Contributor Count')
    pyplot.legend(loc='best')
    pyplot.grid(b=True, which='both', axis='both')
    pyplot.xlim(-1, x_tick_locs[-1] + 1)
    pyplot.ylim(0, max_yval + 5)
    ax = pyplot.gca()
    fig = pyplot.gcf()
    fig.set_size_inches(24, 8)
    fig.set_frameon(False)
    fig.savefig('active_contribs.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()
Exemple #30
0
def draw_total_contributors_graph(people_by_date, start_date, end_date):
    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    total_yvals = []
    reviewers_yvals = []
    authors_yvals = []
    total_set_of_contributors = set()
    total_set_of_reviewers = set()
    total_set_of_authors = set()
    for date in date_range(start_date, end_date):
        todays_total = set()
        todays_reviewers = people_by_date[date]['reviews']
        todays_authors = people_by_date[date]['contribs']
        todays_total.update(todays_reviewers)
        todays_total.update(todays_authors)
        total_set_of_contributors.update(todays_total)
        total_set_of_reviewers.update(todays_reviewers)
        total_set_of_authors.update(todays_authors)
        total_yvals.append(len(total_set_of_contributors))
        reviewers_yvals.append(len(total_set_of_reviewers))
        authors_yvals.append(len(total_set_of_authors))

    lens = map(len, [total_yvals, reviewers_yvals, authors_yvals])
    assert len(set(lens)) == 1, lens

    pyplot.plot(x_vals, total_yvals, '-', color='red',
               label="Total contributors", drawstyle="steps", linewidth=3)
    pyplot.plot(x_vals, reviewers_yvals, '-', color='green',
               label="Total reviewers", drawstyle="steps", linewidth=3)
    pyplot.plot(x_vals, authors_yvals, '-', color='blue',
               label="Total authors", drawstyle="steps", linewidth=3)
    pyplot.title('Total contributors (as of %s)' % datetime.datetime.now().date())
    pyplot.ylabel('Contributors')
    pyplot.legend(loc='upper left')
    x_tick_locs = []
    x_tick_vals = []
    for i, d in enumerate(all_dates):
        if d in RELEASE_DATES:
            pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        if not i % 60:
            x_tick_locs.append(i)
            x_tick_vals.append(d)
    x_tick_locs.append(len(all_dates))
    if len(all_dates) - x_tick_locs[-1] > 30:
        x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs, x_tick_vals, rotation=30, horizontalalignment='right')
    pyplot.xlim(-1, x_tick_locs[-1] + 1)
    pyplot.grid(b=True, which='both', axis='both')
    fig = pyplot.gcf()
    fig.set_size_inches(24, 8)
    fig.savefig('total_contribs.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()

    # small verison
    window = 90
    pyplot.plot(x_vals[:window], total_yvals[-window:], '-', color='red',
               label="Total contributors", drawstyle="steps", linewidth=3)
    pyplot.grid(b=False, which='both', axis='both')
    pyplot.xticks([], [])
    pyplot.yticks([], [])
    pyplot.xlim(-1, window + 1)
    ax = pyplot.gca()
    ax.set_frame_on(True)
    ax.set_facecolor('black')  # change to (24, 24, 24)
    fig = pyplot.gcf()
    fig.set_size_inches(2, 2. / 3)
    fig.savefig('total_contribs_small.png', bbox_inches='tight', pad_inches=0)
    pyplot.close()
Exemple #31
0
def draw_total_contributors_graph(people_by_date, start_date, end_date):
    matplotlib.rcParams.update(matplotlib.rcParamsDefault)

    prop_cycle = pyplot.rcParams["axes.prop_cycle"]
    all_colors = itertools.cycle(prop_cycle.by_key()["color"])

    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    total_yvals = []
    reviewers_yvals = []
    authors_yvals = []
    total_set_of_contributors = set()
    total_set_of_reviewers = set()
    total_set_of_authors = set()
    for date in date_range(start_date, end_date):
        todays_total = set()
        todays_reviewers = people_by_date[date]['reviews']
        todays_authors = people_by_date[date]['contribs']
        todays_total.update(todays_reviewers)
        todays_total.update(todays_authors)
        total_set_of_contributors.update(todays_total)
        total_set_of_reviewers.update(todays_reviewers)
        total_set_of_authors.update(todays_authors)
        total_yvals.append(len(total_set_of_contributors))
        reviewers_yvals.append(len(total_set_of_reviewers))
        authors_yvals.append(len(total_set_of_authors))

    lens = map(len, [total_yvals, reviewers_yvals, authors_yvals])
    assert len(set(lens)) == 1, lens

    pyplot.plot(x_vals,
                total_yvals,
                '-',
                color=next(all_colors),
                label="Total contributors",
                drawstyle="steps",
                linewidth=3)
    pyplot.plot(x_vals,
                reviewers_yvals,
                '-',
                color=next(all_colors),
                label="Total reviewers",
                drawstyle="steps",
                linewidth=1.5)
    pyplot.plot(x_vals,
                authors_yvals,
                '-',
                color=next(all_colors),
                label="Total authors",
                drawstyle="steps",
                linewidth=1.5)
    pyplot.title('Total contributors (as of %s)' %
                 datetime.datetime.now().date())
    pyplot.ylabel('Contributors')
    pyplot.legend(loc='upper left')
    x_tick_locs = []
    x_tick_vals = []
    for i, d in enumerate(all_dates):
        # if d in RELEASE_DATES:
        #     pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        if not i % 60:
            x_tick_locs.append(i)
            x_tick_vals.append(d)
    x_tick_locs.append(len(all_dates))
    if len(all_dates) - x_tick_locs[-1] > 30:
        x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs,
                  x_tick_vals,
                  rotation=30,
                  horizontalalignment='right')
    pyplot.xlim(-1, x_tick_locs[-1] + 1)
    pyplot.grid(b=True, which='both', axis='both')

    pyplot.style.use("fivethirtyeight")
    matplotlib.rcParams["font.sans-serif"] = "B612"
    matplotlib.rcParams["font.family"] = "B612"
    matplotlib.rcParams["axes.labelsize"] = 10
    matplotlib.rcParams["xtick.labelsize"] = 8
    matplotlib.rcParams["ytick.labelsize"] = 8
    matplotlib.rcParams["text.color"] = "k"

    fig = pyplot.gcf()
    fig.set_size_inches(24, 8)
    fig.savefig('total_contribs.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()

    # small verison
    window = 90
    pyplot.plot(x_vals[:window],
                total_yvals[-window:],
                '-',
                color='red',
                label="Total contributors",
                drawstyle="steps",
                linewidth=3)
    pyplot.grid(b=False, which='both', axis='both')
    pyplot.xticks([], [])
    pyplot.yticks([], [])
    pyplot.xlim(-1, window + 1)
    ax = pyplot.gca()
    ax.set_frame_on(True)
    ax.set_facecolor('black')  # change to (24, 24, 24)
    fig = pyplot.gcf()
    fig.set_size_inches(2, 2. / 3)
    fig.savefig('total_contribs_small.png', bbox_inches='tight', pad_inches=0)
    pyplot.close()
# ** CONFIGURATION

alpha = 0.01
confidence = 1 - alpha

# single fit
currency = 'USD'
category =  'design'  # 'art'  #
kpi = 'avg'  # 'money' # 

granularity_in_days = YEAR_DAYS  # n_months_days(6) #

# ** END CONFIGURATION


date_borders = date_range(min_date, max_date, delta_days=granularity_in_days)
date_borders.append(max_date)

# extract and clean data
current_timeseries = timseries_data[
    (timseries_data['currency'] == currency)
    & (timseries_data['category'] == category)
     # &  (timseries_data['departments_ids'].str.contains('29'))  # per vedere singolo dipartimento

    # &  (  timseries_data['departments_ids'].str.contains('29') | timseries_data['departments_ids'].str.contains('74')  | timseries_data['departments_ids'].str.contains('111') )  # per vedere piuù dipartimenti
    ]
current_timeseries.dropna(inplace=True)

# create aggregated timeseries
dates = [date_to_str(d) for d in date_borders[:-1]]
ys = []
Exemple #33
0
        (contribs_by_date, authors_by_count) = json.load(f)
    return contribs_by_date, authors_by_count


def save_commits(data):
    with open(COMMITS_FILENAME, 'wb') as f:
        json.dump(data, f)

if __name__ == '__main__':
    people_by_date = collections.defaultdict(list)
    dates_by_person = collections.defaultdict(list)
    for line in sys.stdin.readlines():
        if not line.strip():
            continue
        name, email, timestamp = line.strip().split('|')
        person = ('%s %s' % (name, email)).decode('utf8')
        person = '%s %s' % (map_one_person(person), email)
        if person.lower() in excluded_authors:
            continue
        ts = dateutil.parser.parse(timestamp).strftime('%Y-%m-%d')
        people_by_date[ts].append(person)
        dates_by_person[person].append(ts)

    # fill in any missing days
    first_date = min(people_by_date.keys())
    for day in date_range(first_date, datetime.datetime.now()):
        if day not in people_by_date:
            people_by_date[day] = []

    save_commits((people_by_date, dates_by_person))
Exemple #34
0
    def build_r_0_arr(self):
        """Returns an array of the reproduction numbers (R) for each day.

        Each element in the array represents a single day in the simulation.
            For example, if self.first_date is 2020-03-01 and self.projection_end_date
            is 2020-09-01, then R_0_ARR[10] would be the R value on 2020-03-11.

        Full description at: https://covid19-projections.com/about/#effective-reproduction-number-r
            and https://covid19-projections.com/model-details/#modeling-the-r-value

        We use three different R values: R0, post-mitigation R, and reopening R.
            We use an inverse logistic/sigmoid function to smooth the transition between
            the three R values.

        To compute the reopen R, we apply a multiplier REOPEN_R_MULT to the lockdown R.
            We map this multiplier to reopen_mult, which assumes greater growth if the
            initial lockdown R is effective.
            e.g. 10% growth for R=1->1.1, but 10% growth for R=0.7 -> (2-0.7)**0.5*1.1*.7 = 0.88
            reopen_mult becomes 1 at around R=1.17 (i.e. no increase on reopening)

            Sample code below to compare the difference:
                mult = 1.1
                for lockdown_r in np.arange(0.5,1.21,0.05):
                    orig_reopen_r = mult * lockdown_r
                    reopen_mult = max(1, (2-lockdown_r)**0.5*mult)
                    new_reopen_r = reopen_mult * lockdown_r
                    print(lockdown_r, orig_reopen_r, new_reopen_r)
        """

        assert 1 <= self.REOPEN_R_MULT <= 10, self.REOPEN_R_MULT
        reopen_mult = max(1, (2 - self.LOCKDOWN_R_0)**0.5 * self.REOPEN_R_MULT)
        reopen_r = reopen_mult * self.LOCKDOWN_R_0
        max_post_open_r = self.get_max_post_open_r()
        post_reopening_r = min(max(max_post_open_r, self.LOCKDOWN_R_0),
                               reopen_r)
        assert reopen_r >= self.LOCKDOWN_R_0, 'Reopen R must be >= lockdown R'

        reopen_date_shift = self.REOPEN_DATE + \
            datetime.timedelta(days=int(self.REOPEN_SHIFT_DAYS) + DEFAULT_REOPEN_SHIFT_DAYS)
        fatigue_idx = self.inflection_day_idx + DAYS_UNTIL_LOCKDOWN_FATIGUE
        reopen_idx = self.get_day_idx_from_date(reopen_date_shift)
        lockdown_reopen_midpoint_idx = (self.inflection_day_idx +
                                        reopen_idx) // 2

        if self.LOCKDOWN_R_0 <= 1:
            # we wait longer before applying the post-reopening decay to allow for
            # longer reopening time (since R_t <= 1)
            days_until_post_reopening = 30
        else:
            days_until_post_reopening = 15
        post_reopening_idx = reopen_idx + days_until_post_reopening
        fall_start_idx = self.get_day_idx_from_date(FALL_START_DATE_NORTH)

        sig_lockdown = get_transition_sigmoid(self.inflection_day_idx,
                                              self.RATE_OF_INFLECTION,
                                              self.INITIAL_R_0,
                                              self.LOCKDOWN_R_0)
        sig_fatigue = get_transition_sigmoid(fatigue_idx, 0.2, 1,
                                             self.LOCKDOWN_FATIGUE)
        sig_reopen = get_transition_sigmoid(reopen_idx, 0.2, self.LOCKDOWN_R_0,
                                            post_reopening_r)

        dates = utils.date_range(self.first_date, self.projection_end_date)
        assert len(dates) == self.N

        # how much to drop post_reopening_r R to get to 1 (max 0.9)
        min_post_reopening_total_decay = min(0.9, 1 / post_reopening_r)

        R_0_ARR = [self.INITIAL_R_0]
        for day_idx in range(1, self.N):
            if day_idx < lockdown_reopen_midpoint_idx:
                r_t = sig_lockdown(day_idx)
            else:
                post_reopening_total_decay = fall_r_mult = 1

                if day_idx > post_reopening_idx:
                    assert day_idx > reopen_idx, day_idx
                    post_reopening_total_decay = max(
                        min_post_reopening_total_decay,
                        self.post_reopening_r_decay**(day_idx -
                                                      post_reopening_idx))
                assert 0 < post_reopening_total_decay <= 1, post_reopening_total_decay

                if day_idx > fall_start_idx:
                    fall_r_mult = min(
                        1.1,
                        self.get_fall_r_multiplier()**(day_idx -
                                                       fall_start_idx))
                assert 1 <= fall_r_mult < 2, fall_r_mult

                r_t = sig_reopen(
                    day_idx) * post_reopening_total_decay * fall_r_mult

            r_t *= sig_fatigue(day_idx)

            # Make sure R is stable
            if day_idx > reopen_idx and abs(r_t / R_0_ARR[-1] - 1) > 0.1:
                assert False, f'R changed too quickly: {day_idx} {r_t} {R_0_ARR}'

            R_0_ARR.append(r_t)

        assert len(R_0_ARR) == self.N
        return R_0_ARR
Exemple #35
0
    msg = []
    msg.append('Global first date is: %s' % global_first_date)
    msg.append('Global last date is: %s' % global_last_date)
    unique_reviewer_set = set()

    actives_windows = [
        # (days, (rolling_avg_span, ...))
        # (30, (180, 365)),
        # (7, (30, 180)),
        (7, (90, )),
    ]
    actives = {x: [] for (x, _) in actives_windows}
    rolling_sets = {x: RollingSet(x) for (x, _) in actives_windows}
    actives_avg = {x: defaultdict(list) for (x, _) in actives_windows}

    for date in date_range(global_first_date, global_last_date):
        contribs = contribs_by_date.get(date, set())
        reviews = reviewers_by_date.get(date, set())
        mapped_contribs = set()
        for person in contribs:
            name, email = person.split('<', 1)
            email = '<' + email
            p = '%s %s' % (map_one_person(person), email)
            if p.lower() in excluded_authors:
                continue
            mapped_contribs.add(name)
        mapped_reviews = set()
        for person in reviews:
            name, email = person.split('<', 1)
            email = '<' + email
            p = '%s %s' % (map_one_person(person), email)
Exemple #36
0
def main(ini_path=None, overwrite_flag=False, delay_time=0, gee_key_file=None,
         max_ready=-1):
    """Compute monthly Tcorr images

    Parameters
    ----------
    ini_path : str
        Input file path.
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).
    delay_time : float, optional
        Delay time in seconds between starting export tasks (or checking the
        number of queued tasks, see "max_ready" parameter).  The default is 0.
    gee_key_file : str, None, optional
        Earth Engine service account JSON key file (the default is None).
    max_ready: int, optional
        Maximum number of queued "READY" tasks.  The default is -1 which is
        implies no limit to the number of tasks that will be submitted.

    """
    logging.info('\nCompute monthly Tcorr images')

    ini = utils.read_ini(ini_path)

    model_name = 'SSEBOP'
    # model_name = ini['INPUTS']['et_model'].upper()

    tmax_name = ini[model_name]['tmax_source']

    export_id_fmt = 'tcorr_image_{product}_month{month:02d}_cycle{cycle:02d}_test'
    asset_id_fmt = '{coll_id}/{month:02d}_cycle{cycle:02d}'

    tcorr_monthly_coll_id = '{}/{}_monthly_test'.format(
        ini['EXPORT']['export_coll'], tmax_name.lower())

    wrs2_coll_id = 'projects/earthengine-legacy/assets/' \
                   'projects/usgs-ssebop/wrs2_descending_custom'

    if (tmax_name.upper() == 'CIMIS' and
            ini['INPUTS']['end_date'] < '2003-10-01'):
        logging.error(
            '\nCIMIS is not currently available before 2003-10-01, exiting\n')
        sys.exit()
    elif (tmax_name.upper() == 'DAYMET' and
            ini['INPUTS']['end_date'] > '2018-12-31'):
        logging.warning(
            '\nDAYMET is not currently available past 2018-12-31, '
            'using median Tmax values\n')
        # sys.exit()
    # elif (tmax_name.upper() == 'TOPOWX' and
    #         ini['INPUTS']['end_date'] > '2017-12-31'):
    #     logging.warning(
    #         '\nDAYMET is not currently available past 2017-12-31, '
    #         'using median Tmax values\n')
    #     # sys.exit()

    # Extract the model keyword arguments from the INI
    # Set the property name to lower case and try to cast values to numbers
    model_args = {
        k.lower(): float(v) if utils.is_number(v) else v
        for k, v in dict(ini[model_name]).items()}
    # et_reference_args = {
    #     k: model_args.pop(k)
    #     for k in [k for k in model_args.keys() if k.startswith('et_reference_')]}

    logging.info('\nInitializing Earth Engine')
    if gee_key_file:
        logging.info('  Using service account key file: {}'.format(gee_key_file))
        # The "EE_ACCOUNT" parameter is not used if the key file is valid
        ee.Initialize(ee.ServiceAccountCredentials('x', key_file=gee_key_file))
    else:
        ee.Initialize()

    logging.debug('\nTmax properties')
    tmax_source = tmax_name.split('_', 1)[0]
    tmax_version = tmax_name.split('_', 1)[1]
    tmax_coll_id = 'projects/earthengine-legacy/assets/' \
                   'projects/usgs-ssebop/tmax/{}'.format(tmax_name.lower())
    tmax_coll = ee.ImageCollection(tmax_coll_id)
    tmax_mask = ee.Image(tmax_coll.first()).select([0]).multiply(0)
    logging.debug('  Collection: {}'.format(tmax_coll_id))
    logging.debug('  Source: {}'.format(tmax_source))
    logging.debug('  Version: {}'.format(tmax_version))

    # Get the Tcorr daily image collection properties
    logging.debug('\nTcorr Image properties')
    tcorr_daily_coll_id = '{}/{}_daily'.format(
        ini['EXPORT']['export_coll'], tmax_name.lower())
    tcorr_img = ee.Image(ee.ImageCollection(tcorr_daily_coll_id).first())
    tcorr_info = utils.get_info(ee.Image(tcorr_img))
    tcorr_geo = tcorr_info['bands'][0]['crs_transform']
    tcorr_crs = tcorr_info['bands'][0]['crs']
    tcorr_shape = tcorr_info['bands'][0]['dimensions']
    # tcorr_geo = ee.Image(tcorr_img).projection().getInfo()['transform']
    # tcorr_crs = ee.Image(tcorr_img).projection().getInfo()['crs']
    # tcorr_shape = ee.Image(tcorr_img).getInfo()['bands'][0]['dimensions']
    tcorr_extent = [tcorr_geo[2], tcorr_geo[5] + tcorr_shape[1] * tcorr_geo[4],
                    tcorr_geo[2] + tcorr_shape[0] * tcorr_geo[0], tcorr_geo[5]]
    logging.debug('  Shape: {}'.format(tcorr_shape))
    logging.debug('  Extent: {}'.format(tcorr_extent))
    logging.debug('  Geo: {}'.format(tcorr_geo))
    logging.debug('  CRS: {}'.format(tcorr_crs))

    if not ee.data.getInfo(tcorr_monthly_coll_id):
        logging.info('\nExport collection does not exist and will be built'
                     '\n  {}'.format(tcorr_monthly_coll_id))
        input('Press ENTER to continue')
        ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, tcorr_monthly_coll_id)

    # Get current asset list
    logging.debug('\nGetting GEE asset list')
    asset_list = utils.get_ee_assets(tcorr_monthly_coll_id)
    if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
        pprint.pprint(asset_list[:10])

    # Get current running tasks
    tasks = utils.get_ee_tasks()
    if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
        logging.debug('  Tasks: {}\n'.format(len(tasks)))
    #     input('ENTER')

    # Limit by year and month
    try:
        month_list = sorted(list(utils.parse_int_set(ini['TCORR']['months'])))
    except:
        logging.info('\nTCORR "months" parameter not set in the INI,'
                     '\n  Defaulting to all months (1-12)\n')
        month_list = list(range(1, 13))
    try:
        year_list = sorted(list(utils.parse_int_set(ini['TCORR']['years'])))
    except:
        logging.info('\nTCORR "years" parameter not set in the INI,'
                     '\n  Defaulting to all available years\n')
        year_list = []

    # Key is cycle day, value is a reference date on that cycle
    # Data from: https://landsat.usgs.gov/landsat_acq
    # I only need to use 8 cycle days because of 5/7 and 7/8 are offset
    cycle_dates = {
        1:  '2000-01-06',
        2:  '2000-01-07',
        3:  '2000-01-08',
        4:  '2000-01-09',
        5:  '2000-01-10',
        6:  '2000-01-11',
        7:  '2000-01-12',
        8:  '2000-01-13',
        # 9:  '2000-01-14',
        # 10: '2000-01-15',
        # 11: '2000-01-16',
        # 12: '2000-01-01',
        # 13: '2000-01-02',
        # 14: '2000-01-03',
        # 15: '2000-01-04',
        # 16: '2000-01-05',
    }

    # Key is cycle day, values are list of paths
    # First list is Landsat 8 paths, second list is Landsat 7 paths
    cycle_paths = {
        5:  [ 1, 17, 33, 49, 65,  81,  97, 106, 122, 138, 154, 170, 186, 202, 218] +
            [ 9, 25, 41, 57, 73,  89,  98, 114, 130, 146, 162, 178, 194, 210, 226],
        # 12: [ 2, 18, 34, 50, 66,  82, 107, 123, 139, 155, 171, 187, 203, 219] +
        #     [10, 26, 42, 58, 74,  99, 115, 131, 147, 163, 179, 195, 211, 227],
        3:  [ 3, 19, 35, 51, 67,  83, 108, 124, 140, 156, 172, 188, 204, 220] +
            [11, 27, 43, 59, 75, 100, 116, 132, 148, 164, 180, 196, 212, 228],
        # 10: [ 4, 20, 36, 52, 68,  84, 109, 125, 141, 157, 171, 189, 205, 221] +
        #     [12, 28, 44, 60, 76, 101, 117, 133, 149, 165, 181, 197, 213, 229],
        1:  [ 5, 21, 37, 53, 69,  85, 110, 126, 142, 158, 174, 190, 206, 222] +
            [13, 29, 45, 61, 77, 102, 118, 134, 150, 166, 182, 198, 214, 230],
        8:  [ 6, 22, 38, 54, 70,  86, 111, 127, 143, 159, 175, 191, 207, 223] +
            [14, 30, 46, 62, 78, 103, 119, 135, 151, 167, 183, 199, 215, 231],
        # 15: [ 7, 23, 39, 55, 71,  87, 112, 128, 144, 160, 176, 192, 208, 224] +
        #     [15, 31, 47, 63, 79, 104, 120, 136, 152, 168, 184, 200, 216, 232],
        6:  [ 8, 24, 40, 56, 72,  88, 113, 129, 145, 161, 177, 193, 209, 225] +
            [16, 32, 48, 64, 80, 105, 121, 137, 153, 169, 185, 201, 217, 233],
        # 13: [ 9, 25, 41, 57, 73,  89,  98, 114, 130, 146, 162, 178, 194, 210, 226] +
        #     [ 1, 17, 33, 49, 65,  81,  90, 106, 122, 138, 154, 170, 186, 202, 218],
        4:  [10, 26, 42, 58, 74,  90,  99, 115, 131, 147, 163, 179, 195, 211, 227] +
            [ 2, 18, 34, 50, 66,  82,  91, 107, 123, 139, 155, 171, 187, 203, 219],
        # 11: [11, 27, 43, 59, 75,  91, 100, 116, 132, 148, 164, 180, 196, 212, 228] +
        #     [ 3, 19, 35, 51, 67,  83,  92, 108, 124, 140, 156, 172, 188, 204, 220],
        2:  [12, 28, 44, 60, 76,  92, 101, 117, 133, 149, 165, 181, 197, 213, 229] +
            [ 4, 20, 36, 52, 68,  84,  93, 109, 125, 141, 157, 173, 189, 205, 221],
        # 9:  [13, 29, 45, 61, 77,  93, 102, 118, 134, 150, 166, 182, 198, 214, 230] +
        #     [ 5, 21, 37, 53, 69,  85,  94, 110, 126, 142, 158, 174, 190, 206, 222],
        # 16: [14, 30, 46, 62, 78,  94, 103, 119, 135, 151, 167, 183, 199, 215, 231] +
        #     [ 6, 22, 38, 54, 70,  86,  95, 111, 127, 143, 159, 175, 191, 207, 223],
        7:  [15, 31, 47, 63, 79,  95, 104, 120, 136, 152, 168, 184, 200, 216, 232] +
            [ 7, 23, 39, 55, 71,  87,  96, 112, 128, 144, 160 ,176, 192, 208, 224],
        # 14: [16, 32, 48, 64, 80,  96, 105, 121, 137, 153, 169, 185, 201, 217, 233] +
        #     [ 8, 24, 40, 56, 72,  88,  97, 113, 129, 145, 161, 177, 193, 209, 225],
    }

    # Iterate over date ranges
    for month in month_list:
        logging.info('\nMonth: {}'.format(month))

        for cycle_day, ref_date in sorted(cycle_dates.items()):
            logging.info('Cycle Day: {}'.format(cycle_day))
            # # DEADBEEF
            # if cycle_day not in [2]:
            #     continue

            ref_dt = datetime.datetime.strptime(ref_date, '%Y-%m-%d')
            logging.debug('  Reference Date: {}'.format(ref_date))

            date_list = sorted(list(utils.date_range(
                datetime.datetime(year_list[0], 1, 1),
                datetime.datetime(year_list[-1], 12, 31))))
            date_list = [
                d.strftime('%Y-%m-%d') for d in date_list
                if ((abs(d - ref_dt).days % 8 == 0) and
                    (int(d.month) == month) and
                    (int(d.year) in year_list))]
            logging.debug('  Dates: {}'.format(', '.join(date_list)))

            export_id = export_id_fmt.format(
                product=tmax_name.lower(), month=month, cycle=cycle_day)
            logging.info('  Export ID: {}'.format(export_id))

            asset_id = asset_id_fmt.format(
                coll_id=tcorr_monthly_coll_id, month=month, cycle=cycle_day)
            logging.info('  Asset ID: {}'.format(asset_id))

            if overwrite_flag:
                if export_id in tasks.keys():
                    logging.debug('  Task already submitted, cancelling')
                    ee.data.cancelTask(tasks[export_id]['id'])
                # This is intentionally not an "elif" so that a task can be
                # cancelled and an existing image/file/asset can be removed
                if asset_id in asset_list:
                    logging.debug('  Asset already exists, removing')
                    ee.data.deleteAsset(asset_id)
            else:
                if export_id in tasks.keys():
                    logging.debug('  Task already submitted, exiting')
                    continue
                elif asset_id in asset_list:
                    logging.debug('  Asset already exists, skipping')
                    continue

            wrs2_coll = ee.FeatureCollection(wrs2_coll_id) \
                .filterBounds(tmax_mask.geometry()) \
                .filter(ee.Filter.inList('PATH', cycle_paths[cycle_day]))
            #     .filter(ee.Filter.inList('PATH', [44]))
            #     .filter(ee.Filter.inList('ROW', [32, 33, 34]))

            def wrs2_tcorr(ftr):
                # Build & merge the Landsat collections for the target path/row
                # Time filters are to remove bad (L5) and pre-op (L8) images
                path = ee.Number(ee.Feature(ftr).get('PATH'))
                row = ee.Number(ee.Feature(ftr).get('ROW'))

                l8_coll = ee.ImageCollection('LANDSAT/LC08/C01/T1_RT_TOA') \
                    .filterMetadata('WRS_PATH', 'equals', path) \
                    .filterMetadata('WRS_ROW', 'equals', row) \
                    .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                                    float(ini['INPUTS']['cloud_cover'])) \
                    .filterMetadata('DATA_TYPE', 'equals', 'L1TP') \
                    .filter(ee.Filter.inList('DATE_ACQUIRED', date_list)) \
                    .filter(ee.Filter.gt('system:time_start',
                                         ee.Date('2013-03-24').millis()))
                l7_coll = ee.ImageCollection('LANDSAT/LE07/C01/T1_RT_TOA') \
                    .filterMetadata('WRS_PATH', 'equals', path) \
                    .filterMetadata('WRS_ROW', 'equals', row) \
                    .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                                    float(ini['INPUTS']['cloud_cover'])) \
                    .filterMetadata('DATA_TYPE', 'equals', 'L1TP') \
                    .filter(ee.Filter.inList('DATE_ACQUIRED', date_list))
                l5_coll = ee.ImageCollection('LANDSAT/LT05/C01/T1_TOA') \
                    .filterMetadata('WRS_PATH', 'equals', path) \
                    .filterMetadata('WRS_ROW', 'equals', row) \
                    .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                                    float(ini['INPUTS']['cloud_cover'])) \
                    .filterMetadata('DATA_TYPE', 'equals', 'L1TP')  \
                    .filter(ee.Filter.inList('DATE_ACQUIRED', date_list)) \
                    .filter(ee.Filter.lt('system:time_start',
                                         ee.Date('2011-12-31').millis()))
                l4_coll = ee.ImageCollection('LANDSAT/LT04/C01/T1_TOA') \
                    .filterMetadata('WRS_PATH', 'equals', path) \
                    .filterMetadata('WRS_ROW', 'equals', row) \
                    .filterMetadata('CLOUD_COVER_LAND', 'less_than',
                                    float(ini['INPUTS']['cloud_cover'])) \
                    .filterMetadata('DATA_TYPE', 'equals', 'L1TP') \
                    .filter(ee.Filter.inList('DATE_ACQUIRED', date_list))
                landsat_coll = ee.ImageCollection(
                    l8_coll.merge(l7_coll).merge(l5_coll))
                # landsat_coll = ee.ImageCollection(
                #     l8_coll.merge(l7_coll).merge(l5_coll).merge(l4_coll))

                def tcorr_img_func(image):
                    t_obj = ssebop.Image.from_landsat_c1_toa(
                        ee.Image(image), **model_args)
                    t_stats = ee.Dictionary(t_obj.tcorr_stats) \
                        .combine({'tcorr_value': 0, 'tcorr_count': 0},
                                 overwrite=False)
                    tcorr = ee.Number(t_stats.get('tcorr_value'))
                    count = ee.Number(t_stats.get('tcorr_count'))

                    return tmax_mask.add(ee.Image.constant(tcorr)) \
                        .rename(['tcorr']) \
                        .set({
                            'system:time_start': image.get('system:time_start'),
                            'tcorr': tcorr,
                            'count': count
                        })

                reducer = ee.Reducer.median() \
                    .combine(ee.Reducer.count(), sharedInputs=True)

                # Compute median monthly value for all images in the WRS2 tile
                wrs2_tcorr_coll = ee.ImageCollection(
                        landsat_coll.map(tcorr_img_func)) \
                    .filterMetadata('count', 'not_less_than',
                                    float(ini['TCORR']['min_pixel_count']))

                wrs2_tcorr_img = wrs2_tcorr_coll.reduce(reducer) \
                    .rename(['tcorr', 'count'])

                # Compute stats from the properties also
                wrs2_tcorr_stats = ee.Dictionary(ee.List(
                    wrs2_tcorr_coll.aggregate_array('tcorr')).reduce(reducer))
                wrs2_tcorr_stats = wrs2_tcorr_stats \
                    .combine({'median': 0, 'count': 0}, overwrite=False)

                return wrs2_tcorr_img \
                    .clip(ftr.geometry()) \
                    .set({
                        'wrs2_tile': path.format('%03d').cat(row.format('%03d')),
                        # 'wrs2_tile': ftr.get('WRS2_TILE'),
                        'tcorr': ee.Number(wrs2_tcorr_stats.get('median')),
                        'count': ee.Number(wrs2_tcorr_stats.get('count')),
                        'index': 1,
                    })

            # Combine WRS2 Tcorr monthly images to a single monthly image
            output_img = ee.ImageCollection(wrs2_coll.map(wrs2_tcorr)) \
                .filterMetadata('count', 'not_less_than',
                                float(ini['TCORR']['min_scene_count'])) \
                .mean() \
                .rename(['tcorr', 'count'])

            output_img = ee.Image([
                    tmax_mask.add(output_img.select(['tcorr'])).double(),
                    tmax_mask.add(output_img.select(['count'])).min(250).uint8()]) \
                .rename(['tcorr', 'count']) \
                .set({
                    # 'system:time_start': utils.millis(iter_start_dt),
                    'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'),
                    'cycle_day': int(cycle_day),
                    'month': int(month),
                    'years': ','.join(map(str, year_list)),
                    'model_name': model_name,
                    'model_version': ssebop.__version__,
                    'tmax_source': tmax_source.upper(),
                    'tmax_version': tmax_version.upper(),
                })

            logging.debug('  Building export task')
            task = ee.batch.Export.image.toAsset(
                image=ee.Image(output_img),
                description=export_id,
                assetId=asset_id,
                crs=tcorr_crs,
                crsTransform='[' + ','.join(list(map(str, tcorr_geo))) + ']',
                dimensions='{0}x{1}'.format(*tcorr_shape),
            )

            logging.debug('  Starting export task')
            utils.ee_task_start(task)

            # Pause before starting the next export task
            utils.delay_task(delay_time, max_ready)
            logging.debug('')
Exemple #37
0
def draw_active_contribs_trends(actives_windows, actives, actives_avg,
                                start_date, end_date):
    all_dates = list(date_range(start_date, end_date))
    x_vals = range(len(all_dates))
    len_all_dates = len(all_dates)
    max_yval = 0
    for aw, rolling_avg_windows in actives_windows:
        for r_a_w in rolling_avg_windows:
            pyplot.plot(x_vals,
                        actives_avg[aw][r_a_w][-len_all_dates:],
                        '-',
                        label="%d day avg (of %d day total)" % (r_a_w, aw),
                        linewidth=5)
            max_yval = max(max_yval, *actives_avg[aw][r_a_w][-len_all_dates:])
    pyplot.title('Active contributors (as of %s)' %
                 datetime.datetime.now().date())
    pyplot.ylabel('Contributor Count')
    pyplot.legend(loc='upper left')
    x_tick_locs = []
    x_tick_vals = []
    for i, d in enumerate(all_dates):
        if d in RELEASE_DATES:
            pyplot.axvline(x=i, alpha=0.3, color='#469bcf', linewidth=2)
        if not i % 60:
            x_tick_locs.append(i)
            x_tick_vals.append(d)
    x_tick_locs.append(len(all_dates))
    if len(all_dates) - x_tick_locs[-1] > 30:
        x_tick_vals.append(all_dates[-1])
    pyplot.xticks(x_tick_locs,
                  x_tick_vals,
                  rotation=30,
                  horizontalalignment='right')
    pyplot.grid(b=True, which='both', axis='both')
    pyplot.xlim(-1, x_tick_locs[-1] + 1)
    pyplot.ylim(0, max_yval + 5)
    ax = pyplot.gca()
    fig = pyplot.gcf()
    fig.set_size_inches(24, 8)
    fig.set_frameon(False)
    fig.savefig('active_contribs.png', bbox_inches='tight', pad_inches=0.25)
    pyplot.close()

    # small verison
    window = 90
    for aw, rolling_avg_windows in actives_windows:
        for r_a_w in rolling_avg_windows[:1]:  # the first window configured
            pyplot.plot(x_vals[:window],
                        actives_avg[aw][r_a_w][-window:],
                        '-',
                        label="%d day avg (of %d day total)" % (r_a_w, aw),
                        linewidth=3)
    pyplot.grid(b=False, which='both', axis='both')
    pyplot.xticks([], [])
    pyplot.yticks([], [])
    pyplot.xlim(-1, window + 1)
    ax = pyplot.gca()
    ax.set_frame_on(True)
    # ax.set_facecolor('black')  # change to (24, 24, 24)
    fig = pyplot.gcf()
    fig.set_size_inches(2, 2. / 3)
    fig.savefig('active_contribs_small.png', bbox_inches='tight', pad_inches=0)
    pyplot.close()