def StateThread(): global initializeNow global isPrimary while 1: try: currentHost = socket.gethostname() blob_service = BlobService(account_name=azureStorageAccountName, account_key=azureStorageAccountKey) if (initializeNow == True): initializeNow = False print("Initializing '" + currentHost + "' as primary.") newContents = currentHost blob_service.create_container(container) blob_service.put_block_blob_from_text(container, blob, newContents) while 1: print("Downloading current state.") currentContents = blob_service.get_blob_to_text(container, blob) if (currentContents==currentHost): isPrimary = True print("isPrimary = True") # we have now received status, if second thread NOT running, start if (t2.isAlive() == False): t2.start() elif (currentContents!=currentHost and currentContents.count>0): isPrimary = False print("isPrimary = False") # we have now received status, if second thread NOT running, start if (t2.isAlive() == False): t2.start() sleep(.1) except Exception as e: print ("Error in MainStateThread: " + e)
def upload_from_text(container, content): filename = str(uuid.uuid4()) blob_service = BlobService(account_name=config.AZURE_STORAGE_NAME, account_key=config.AZURE_STORAGE_KEY) try: blob_service.put_block_blob_from_text(container, filename, content) return generate_blob_url(container, filename) except: return ""
def _getUrlForTestFile(cls, size=None): from toil.jobStores.azureJobStore import _fetchAzureAccountKey fileName = 'testfile_%s' % uuid.uuid4() containerName = cls._externalStore() url = 'wasb://%s@%s.blob.core.windows.net/%s' % (containerName, cls.accountName, fileName) if size is None: return url blobService = BlobService(account_key=_fetchAzureAccountKey(cls.accountName), account_name=cls.accountName) content = os.urandom(size) blobService.put_block_blob_from_text(containerName, fileName, content) return url, hashlib.md5(content).hexdigest()
class azure_storage_writer (object): """storage operation wrapper, desiged for writing logs to storage""" def __init__(self, account_name, account_key, container, prefix): self._blob = BlobService(account_name=account_name, account_key=account_key) self._cur_path = None self._buf = io.StringIO() self._prefix = prefix self._container = container self._blob.create_container(container) self._logger = create_timed_rotating_log() def write_log(self, entity): path = self._get_path(entity[0]) if (self._cur_path == None): self._cur_path = path elif(self._cur_path != path): self._dump_buf_to_storage() self._buf.close() self._buf = io.StringIO() self._cur_path = path self._buf.write(entity[1]) self._buf.write("\n") def close(self): if (self._cur_path != None): self._dump_buf_to_storage() self._buf.close() def _dump_buf_to_storage(self): self._logger.info("Begin dump to azure blob") loop = 0; while True: try: self._blob.put_block_blob_from_text(self._container,self._cur_path, self._buf.getvalue()) break except AzureHttpError as e: self._logger.warn("Hit an AzureHttpError " + str(e)) self._logger.warn("Retry times: {0}".format(loop)) loop = loop + 1 if loop >= 3: raise e except Exception as e: self._logger.warn("Hit an Exception " + str(e)) raise e self._logger.info("Dump to azure blob succeeded.") def _get_path(self, timestamp): #timestamp = int(timestamp) d = datetime.fromtimestamp(int(timestamp)) part = str.format("logs-part-{}.txt", d.minute // 5) path_str = d.strftime('%Y-%m-%d/%H') return str.format("{}/{}/{}", self._prefix, path_str, part)
def _prepareTestFile(self, containerName, size=None): from toil.jobStores.azureJobStore import _fetchAzureAccountKey from azure.storage.blob import BlobService fileName = 'testfile_%s' % uuid.uuid4() url = 'wasb://%s@%s.blob.core.windows.net/%s' % (containerName, self.accountName, fileName) if size is None: return url blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName), account_name=self.accountName) content = os.urandom(size) blobService.put_block_blob_from_text(containerName, fileName, content) return url, hashlib.md5(content).hexdigest()
def main(account_name, account_key): sc = SparkContext() sqlContext = SQLContext(sc) patient_records_container = 'patientrecords' glucose_levels_container = 'glucoselevelsaggs' preds_container = 'predictions' blob_service = BlobService(account_name=account_name, account_key=account_key) blob_service.create_container(preds_container) day_to_predict = get_most_recent_date(blob_service, glucose_levels_container) df = get_df_from_blob(blob_service, glucose_levels_container, patient_records_container, day_to_predict) project_path = 'wasb://model@{}.blob.core.windows.net/{}' si_pipe_model = PipelineModel.read().load(path=project_path.format(account_name, 'si_pipe_model')) oh_pipe_model = PipelineModel.read().load(path=project_path.format(account_name, 'oh_pipe_model')) model = RandomForestClassificationModel.read().load(path=project_path.format(account_name, 'model')) df_spark = sqlContext.createDataFrame(df) df_preds = si_pipe_model.transform(df_spark) df_preds = oh_pipe_model.transform(df_preds) num_var_names = ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient', 'number_emergency', 'number_inpatient', 'diag_1', 'diag_2', 'diag_3', 'number_diagnoses', 'glucose_min', 'glucose_max', 'glucose_mean', 'glucose_var'] cat_var_names = ['race', 'gender', 'age', 'weight', 'admission_type_id', 'discharge_disposition_id', 'admission_source_id', 'payer_code', 'medical_specialty', 'max_glu_serum', 'A1Cresult', 'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'insulin', 'glyburide-metformin', 'glipizide-metformin', 'glimepiride-pioglitazone', 'metformin-rosiglitazone', 'metformin-pioglitazone', 'change', 'diabetesMed', 'diag_1_missing', 'diag_2_missing', 'diag_3_missing', 'race_missing', 'weight_missing', 'payer_code_missing', 'medical_specialty_missing'] va = VectorAssembler(inputCols=(num_var_names + [c + "__encoded__" for c in cat_var_names]), outputCol='features') df_preds = va.transform(df_preds).select('features') df_preds = model.transform(df_preds) df_preds_pandas = df_preds.toPandas() df_preds_pandas = pd.concat([df[['patient_nbr', 'discharge_date']], df_preds_pandas['probability'].map(lambda x: x[1])], axis=1) # Save the predictions blob_service.put_block_blob_from_text(blob_name='-'.join(str(day_to_predict).split('/')) + '.csv', container_name=preds_container, text=df_preds_pandas.to_csv(index=False)) return
from azure.storage.blob import BlobService import socket import sys azureStorageAccountName = "removed" azureStorageAccountKey = "removed" container = "ilbcp1" blob = "currentprimary.dat" retryCount = 0 while 1: # keep main thread running try: print ("Started.") currentHost = socket.gethostname() print ("Setting as primary...") blob_service = BlobService(account_name=azureStorageAccountName, account_key=azureStorageAccountKey) newContents = currentHost blob_service.create_container(container) blob_service.put_block_blob_from_text(container, blob, newContents) print ("Done.") sys.exit() except Exception as e: print("Exception!") # e ? retryCount = retryCount + 1 if retryCount>5: print ("Permanently failed.") sys.exit()