def set_last_checked_pypi(value): table_service = storage.CloudStorageAccount( STORAGE_ACCOUNT_NAME, STORAGE_ACCOUNT_KEY).create_table_service() table_service.create_table(CONFIG_TABLENAME) table_service.insert_or_replace_entity(CONFIG_TABLENAME, 'last_checked_pypi', 'ROWKEY', {'value': value})
def get_last_checked_pypi(): table_service = storage.CloudStorageAccount( STORAGE_ACCOUNT_NAME, STORAGE_ACCOUNT_KEY).create_table_service() table_service.create_table(CONFIG_TABLENAME) last_checked_pypi = table_service.query_entities( CONFIG_TABLENAME, "PartitionKey eq 'last_checked_pypi'") if not len(last_checked_pypi): return None else: return last_checked_pypi[0].value
def dump_elbs(year=2016): storage_key = settings['azure']['pcs_storage_key'] account = az.CloudStorageAccount(account_name='pcslive', account_key=storage_key) blob_service = account.create_block_blob_service() year_ids = elb_repo.get_elb_harvest_year_ids(year=2016) if not os.path.exists('data/elbs'): os.mkdir('data/elbs') for idx, elb_year_id in enumerate(year_ids): print("downloading elb GIS cells. idx, yearid: ({} of {}), {}".format(idx, len(year_ids), elb_year_id)) crop = gis_repo.get_pps_crop(elb_year_id) if not 'Corn' in crop: print("found not-corn crop, ignoring: {}".format(crop)) continue # use the harvest layers elb_source_layers = [ b.name for b in list(blob_service.list_blobs('sourcelayers', str(elb_year_id))) if any(x in b.name for x in ['_13_', '_14_', '_15_'])] elb_harvest_source_layer_name = elb_source_layers[0] if len(elb_source_layers) > 0 else None if elb_harvest_source_layer_name is None: print("ELB has no harvest layer: {}".format(elb_year_id)) continue blob_zip = blob_service.get_blob_to_bytes('sourcelayers', elb_harvest_source_layer_name) vsiz = '/vsimem/{}.zip'.format(uuid.uuid4().hex) # gdal/ogr requires a .zip extension FileFromMemBuffer(vsiz, bytes(blob_zip.content)) with fiona.Collection(vsiz, vsi='zip') as f: shp = GeoDataFrame.from_features(f, crs={'init': 'epsg:4326'}) elb_points = GeoDataFrame(shp.loc[shp['ELB_ID'] > 0]) elb_centroids = list(elb_points.centroid) pps = gis_repo.processed_layer_shapes_by_year_id(elb_year_id) # get pps cells that have an elb pps_elb_cells = DataFrame( pps.loc[pps['geometry'].apply(lambda x: any(x.intersects(c) for c in elb_centroids))]) pps_elb_cells.drop(['geometry'], inplace=True, axis=1) # load weather record wx = gis_repo.weather_by_year_id(elb_year_id) pps_elb_cells = pandas.concat([ pps_elb_cells, pandas.DataFrame([wx.values], index=pps_elb_cells.index, columns=wx.keys())], axis=1) pps_elb_cells.to_pickle(f'data/elbs/{elb_year_id}_elb.pickle.gz', compression='gzip')
import requests import config from azure import storage from PackageInformationWorker.PyPIPackageInformation import PyPIPackageInformation import json import azure.storage.queue as queue import traceback import urllib import logging logger = logging.getLogger() account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY STATIC_ROW_KEY = 'ROWKEY' table_service = storage.CloudStorageAccount( account_name, account_key).create_table_service() table_service.create_table(config.PACKAGE_VERSION_DATA_TABLENAME) table_service.create_table(config.PACKAGE_SUMMARY_TABLENAME) def main(): # package, version = ('azure', '1.0.0') # get a package to look at # check that package and version. # version data just gets filled in # summary trickier. # summary -> name, # first_published (might be different than python2_start if # not using trove classifier) # python2_start (change if we find earlier), # python2_end (change if we find earlier, remove if package
#!/usr/bin/python import azure.storage as azs account = azs.CloudStorageAccount( account_name='hyperfineml', account_key= 'KQBYI5jV0nPinp/uheXZzqtdrGPKfoRGOoRsssPiLQxa/M+mvogXAf10SBEa8YuIbtMfly33nNXTtmISJoH3vg==' ) service = account.create_block_blob_service() service.create_blob_from_path('zipped', 'iq.txt.bz2', './iq.txt.bz2')
storage_sas = settings['azure']['pcs_storage_sas'] storage_key = settings['azure']['pcs_storage_key'] # load training data and train et model train_df = pcs_data_loader.load_corn_data_frame() train_cols = set(train_df.keys()) y = train_df['Dry_Yield'] X = train_df.drop(['Dry_Yield', 'Area'], axis=1) scaler = StandardScaler() scaler.fit(X) extra_trees = ExtraTreesRegressor(n_jobs=-1, verbose=True, n_estimators=45) extra_trees.fit(scaler.transform(X), y) # account = az.CloudStorageAccount(account_name='pcslive', sas_token=storage_sas) account = az.CloudStorageAccount(account_name='pcslive', account_key=storage_key) blob_service = account.create_block_blob_service() year_ids = elb_repo.get_elb_harvest_year_ids(year=2016) results = [] for idx, year_id in enumerate(year_ids): print("running elb prediction comparision. idx, yearid: ({} of {}), {}". format(idx, len(year_ids), year_id)) crop = gis_repo.get_pps_crop(year_id) if not 'Corn' in crop: print("found not-corn crop, ignoring: {}".format(crop)) continue # use the indexed layer to find PL cells that are part of the ELB(s) # indexed layer is source layer ID 19
def __init__(self, storage_account_name, storage_key): self.client = storage.CloudStorageAccount(storage_account_name, storage_key) self.service = self.client.create_block_blob_service()