def __init__(self, inputs): self._cleaning = True clean_opts = {'--nocleanup', '-nc'}.intersection(inputs) if len(clean_opts) > 0: self._cleaning = False [inputs.remove(opt) for opt in clean_opts] self._facilitator = self._find_facilitator() logger.info("Looking up the available tests.") self._loc = path.dirname(path.abspath(__file__)) patt = re.compile(r'(test_(\w+?)\.in)') self._init_state = listdir(self._loc) matches = map(patt.match, self._init_state) self._input_files = {} for m in filter(lambda x: x is not None, matches): self._input_files[m.groups()[1]] = m.groups()[0] logger.info("Selecting tests to run.") self._test_list = [] if 'all' in inputs: self._test_list = self._input_files.keys() else: for inp in inputs: if inp in self._input_files.keys(): self._test_list.append(inp) else: raise InputError('Unrecognized bioagent test: %s' % inp) self._trips_handle = None self._bioagent_handle = None return
def run_all_species(SMILEs): ''' function to run all species for a SIGNLE chemical ''' if len(SMILEs) > 1: raise InputError("Only Accept One SMILEs at a Time") cur_path = os.path.dirname(__file__) all_models = [ d for d in os.listdir(cur_path + '/../models') if os.path.isdir(os.path.join(cur_path + '/../models', d)) ] species = [] all_p = defaultdict(list) for each_model in all_models: species.append(each_model) this_qsar = qsar(each_model) this_p, this_inside, this_error, this_higher, this_lower = this_qsar.predict( SMILEs) all_p[each_model] = [ this_p[0][0], this_inside[0], this_error[0], this_higher[0], this_lower[0] ] df = pd.DataFrame.from_dict(all_p, orient='index') df.columns = [ 'Prediction', 'Inside AD', 'Prediction Error', 'Prediction Upper', 'Prediction Lower' ] return df
def main(): if len(sys.argv) != 5: raise InputError( "Usage: python write_tfrecords_test.py args_test_json_path " "test_json_dir tfrecord_dir vocab_dir") # TODO REFACTOR!!! args_test_path = sys.argv[1] json_dir = sys.argv[2] tfrecord_dir = sys.argv[3] vocab_dir = sys.argv[4] # find the used arguments if os.path.exists(os.path.join(os.path.abspath(vocab_dir), 'args.json')): args_path = os.path.join(os.path.abspath(vocab_dir), 'args.json') else: args_path = os.path.join(vocab_dir, os.listdir(vocab_dir)[0], 'args.json') with open(args_path) as file: args_used = json.load(file) args = load_json(args_path) dataset = Dataset( # keep consistent with the training datasets max_document_length=args_used['max_document_length'], max_vocab_size=args_used['max_vocab_size_allowed'], min_frequency=args_used['min_frequency'], max_frequency=args_used['max_frequency'], padding=args_used.get('padding', args['padding']), write_bow=args_used.get('write_bow', args['write_bow']), write_tfidf=args_used.get('write_tfidf', args['write_tfidf']), tokenizer_=args_used.get('tokenizer', args['tokenizer']), stemmer=args_used.get('stemmer', args['stemmer']), stopwords=args_used.get('stopwords', args['stopwords']), preproc=args_used.get('preproc', args.get('preproc', True)), vocab_all=args_used.get('vocab_all', args.get('vocab_all', False)), # may be different text_field_names=args['text_field_names'], label_field_name=args['label_field_name'], label_type=args.get('label_type', 'int'), # test split only train_ratio=0.0, valid_ratio=0.0, # default in test mode json_dir=json_dir, tfrecord_dir=tfrecord_dir, vocab_dir=vocab_dir, generate_basic_vocab=False, vocab_given=True, vocab_name='vocab_v2i.json', generate_tf_record=True)
def prct_change(self, rate_name, tag=None, shift=1, resample='D', period_start=None, period_end=None, SaveToDB=False): data = self.manager.get_raw_data(RateName=rate_name, Tag=tag)[2][['date', 'float_value']] data = data.set_index(data['date'])['float_value'] indexx = pd.Index(pd.to_datetime(data.index)) data = pd.DataFrame(data) data = data.set_index(indexx) if data.shape[1] != 1: raise InputError(data, 'Shape more than 1') if resample != 'D': data = data.resample(resample, how=lambda x: x[-1]) data = data.pct_change(periods=1) if resample == 'W': data = data*7/365*100 elif resample == 'M': data = data*30/365*100 elif resample == 'D': data = data*1/365*100 if SaveToDB: category = self.manager.get_raw_data(rate_name)[0][['description', 'name', 'parent_name']] rates = self.manager.get_raw_data(rate_name)[1][['category_name', 'name', 'source', 'tag']] rateshistory = pd.DataFrame() rate_name = rates.name.values[0] col_name = data.columns.values[0] for idx in data.index: rateshistory = rateshistory.append( {'rates_name': rate_name, 'date': idx, 'float_value': data.get_value(idx, col_name), 'string_value': None, 'tag': 'PC[{0}]'.format(shift)}, ignore_index=True) source = rates['source'].values[0] self.manager.save_raw_data(category, rates, rateshistory, source) try: tag = self.manager.session.query(Rates.tag).filter(Rates.name == rate_name).one() if tag[0] is None: tag_new = 'PC[{0}]'.format(shift) else: tag_new = tag[0] + '|PC[{0}]'.format(shift) self.manager.session.query(Rates).filter(Rates.name == rate_name).update({"tag": tag_new}) self.manager.session.commit() except Exception as e: self.session.rollback() raise e return data else: return data
def zonStat_selectedArea(inputCSV, hdm_outRasterPath, gfa_outRasterPath, population=0, resolution=100): ''' This function calculates the sum of demand within a pixels with given resolution. The pixel will also overlay to the standard fishnet used for the hotmaps toolbox since the multiplying factor matches to distances from the origin of the standard fishnet. The code assumes a resolution of 100x100 m for the output. annual building demand must be in kWh/a output heat density map raster is in MWh/ha ''' if isinstance(inputCSV, pd.DataFrame): ifile = inputCSV else: if not os.path.isfile(inputCSV): raise InputError('The input csv file does not exist!') ifile = pd.read_csv(inputCSV) demand = ifile['demand'].values GFA = ifile['GFA'].values if np.sum(GFA): GFA_valid = True else: GFA_valid = False X = ifile['X_3035'].values Y = ifile['Y_3035'].values x0 = resolution * np.floor(np.min(X)/resolution).astype(int) y0 = resolution * np.ceil(np.max(Y)/resolution).astype(int) rasterOrigin = (x0, y0) xIndex = np.floor((X-x0)/resolution).astype(int) yIndex = np.floor((y0-Y)/resolution).astype(int) xWidth = np.max(xIndex) - np.min(xIndex) + 1 yWidth = np.max(yIndex) - np.min(yIndex) + 1 index = xIndex + xWidth * yIndex # The number of rows of "index" and "demand" must be equal. sortedData = np.asarray(sorted(zip(index, demand), key=lambda x: x[0])) sortedData_GFA = np.asarray(sorted(zip(index, GFA), key=lambda x: x[0])) unique, counts = np.unique(index, return_counts=True) end = np.cumsum(counts) st = np.concatenate((np.zeros((1)), end[0:end.size-1])) # xIndex and yIndex start from 0. So they should be added by 1 sumDem = np.zeros((np.max(xIndex)+1)*(np.max(yIndex)+1)) item_location = 0 if GFA_valid: sumGFA = np.zeros_like(sumDem) for item in unique: # sum of demand for each index startIndex = int(st[item_location]) endIndex = int(end[item_location]) sumDem[item] = np.sum(sortedData[startIndex:endIndex, 1]) sumGFA[item] = np.sum(sortedData_GFA[startIndex:endIndex, 1]) item_location += 1 else: for item in unique: # sum of demand for each index startIndex = int(st[item_location]) endIndex = int(end[item_location]) sumDem[item] = np.sum(sortedData[startIndex:endIndex, 1]) item_location += 1 ''' xWidth and yWidth in the following refer to columns and rows, respectively and should not wrongly be considered as coordination! ''' # kWh/ha = 10^(-3) * MWh/ha sumDem = 0.001 * sumDem.reshape((yWidth, xWidth)) geo_transform = [rasterOrigin[0], resolution, 0 , rasterOrigin[1], 0, -resolution] CM19.main(hdm_outRasterPath, geo_transform, str(sumDem.dtype), sumDem) abs_heat_demand = np.sum(demand) if GFA_valid: # gross floor area density map sumGFA = sumGFA.reshape((yWidth, xWidth)) CM19.main(gfa_outRasterPath, geo_transform, str(sumGFA.dtype), sumGFA) mean_spec_demand = abs_heat_demand/np.sum(GFA) else: mean_spec_demand = np.nan if population: mean_dem_perCapita = abs_heat_demand/float(population) else: mean_dem_perCapita = np.nan # print("Absolute heat demand: %0.1f GWh\a" # "Mean heat demand per capita: %0.2f kWh\n" # "Mean heat demand per heated surface (ave. specific demand): %0.2f" # " kWh/m2" # % (abs_heat_demand*10**(-6), mean_dem_perCapita, mean_spec_demand)) return (abs_heat_demand*10**(-6), mean_dem_perCapita, mean_spec_demand)
def main(): if len(sys.argv) != 5: raise InputError( "Usage: python write_tfrecords_finetune.py dataset_name " "args_finetune_json_path finetune_json_dir vocab_dir") dataset_name = sys.argv[1] args_finetune_path = sys.argv[2] json_dir = sys.argv[3] vocab_dir = sys.argv[4] # find the used arguments if os.path.exists(os.path.join(os.path.abspath(vocab_dir), 'args.json')): args_path = os.path.join(os.path.abspath(vocab_dir), 'args.json') else: args_path = os.path.join(vocab_dir, os.listdir(vocab_dir)[0], 'args.json') with open(args_path) as file: args_used = json.load(file) args = load_json(args_finetune_path) tfrecord_dir = os.path.join("data/tf/single/", dataset_name) # tfrecord_dir_name = \ # "min_" + str(args['min_frequency']) + \ # "_max_" + str(args['max_frequency']) + \ # "_vocab_" + str(args['max_vocab_size']) + \ # "_doc_" + str(args['max_document_length']) + \ # "_tok_" + args['tokenizer'].replace('_tokenizer', '') # tfrecord_dir = os.path.join(tfrecord_dir, tfrecord_dir_name) tfrecord_dir_name = os.path.basename(vocab_dir) tfrecord_dir = os.path.join(tfrecord_dir, tfrecord_dir_name) dataset = Dataset( # TODO keep consistent with the training datasets? max_document_length=args_used['max_document_length'], max_vocab_size=args_used['max_vocab_size_allowed'], min_frequency=args_used['min_frequency'], max_frequency=args_used['max_frequency'], # padding=args_used.get('padding', args['padding']), # write_bow=args_used.get('write_bow', args['write_bow']), # write_tfidf=args_used.get('write_tfidf', args['write_tfidf']), # tokenizer_=args_used.get('tokenizer', args['tokenizer']), # preproc=args_used.get('preproc', args.get('preproc', True)), # vocab_all=args_used.get('vocab_all', args.get('vocab_all', False)), padding=args_used['padding'], write_bow=args_used['write_bow'], write_tfidf=args_used['write_tfidf'], tokenizer_=args_used['tokenizer_'], stemmer=args_used['stemmer'], stopwords=args_used['stopwords'], preproc=args_used['preproc'], vocab_all=args_used['vocab_all'], # may be different text_field_names=args['text_field_names'], label_field_name=args['label_field_name'], label_type=args.get('label_type', 'int'), train_ratio=args['train_ratio'], valid_ratio=args['train_ratio'], # default in finetune mode json_dir=json_dir, tfrecord_dir=tfrecord_dir, vocab_dir=vocab_dir, generate_basic_vocab=False, vocab_given=True, vocab_name='vocab_v2i.json', generate_tf_record=True )
def main(): if len(sys.argv) != 5: raise InputError( "Usage: python write_tfrecords_predict.py dataset_args_path " "predict_json_path predict_tf_path vocab_dir") dataset_args_path = sys.argv[1] predict_json_path = sys.argv[2] predict_tf_path = sys.argv[3] vocab_dir = sys.argv[4] # find the used arguments if os.path.exists(os.path.join(os.path.abspath(vocab_dir), 'args.json')): args_path = os.path.join(os.path.abspath(vocab_dir), 'args.json') else: args_path = os.path.join( vocab_dir, os.listdir(vocab_dir)[0], 'args.json') with open(args_path) as file: args_used = json.load(file) if not os.path.exists(os.path.dirname(predict_tf_path)): make_dir(os.path.dirname(predict_tf_path)) # args_DATASET.json or args_merged.json which has min_freq, max_freq, # max_document_length etc. information, which are used to further build # vocabulary args = load_json(dataset_args_path) print(args) dataset = Dataset( # keep consistent with the training datasets max_document_length=args_used['max_document_length'], max_vocab_size=args_used['max_vocab_size_allowed'], min_frequency=args_used['min_frequency'], max_frequency=args_used['max_frequency'], # padding=args_used.get('padding', args.get('padding', False)), # write_bow=args_used.get('write_bow', args.get('write_bow', False)), # write_tfidf=args_used.get('write_tfidf', args.get('write_tfidf', False)), # tokenizer_=args_used.get('tokenizer', args['tokenizer']), # preproc=args_used.get('preproc', args.get('preproc', True)), # vocab_all=args_used.get('vocab_all', args.get('vocab_all', False)), # use new arguments padding=args.get('padding', args_used.get('padding', False)), write_bow=args.get('write_bow', args_used.get('write_bow', False)), write_tfidf=args.get('write_tfidf', args_used.get('write_tfidf', False)), tokenizer_=args.get('tokenizer', args_used.get('tokenizer_', 'lower_tokenizer')), stemmer=args.get('stemmer', args_used.get('stemmer', 'porter_stemmer')), stopword=args.get('stopwords', args_used.get('stopwords', 'nltk')), preproc=args.get('preproc', args_used.get('preproc', True)), vocab_all=args.get('vocab_all', args_used.get('vocab_all', False)), # may be different text_field_names=args['text_field_names'], label_field_name=args['label_field_name'], label_type=args.get('label_type', 'int'), # default in predict mode json_dir=None, tfrecord_dir=None, vocab_dir=vocab_dir, generate_basic_vocab=False, vocab_given=True, vocab_name='vocab_v2i.json', generate_tf_record=True, predict_mode=True, predict_json_path=predict_json_path, predict_tf_path=predict_tf_path )