def __init__(self, inputs):
        self._cleaning = True
        clean_opts = {'--nocleanup', '-nc'}.intersection(inputs)
        if len(clean_opts) > 0:
            self._cleaning = False
            [inputs.remove(opt) for opt in clean_opts]
        self._facilitator = self._find_facilitator()

        logger.info("Looking up the available tests.")
        self._loc = path.dirname(path.abspath(__file__))
        patt = re.compile(r'(test_(\w+?)\.in)')
        self._init_state = listdir(self._loc)
        matches = map(patt.match, self._init_state)
        self._input_files = {}
        for m in filter(lambda x: x is not None, matches):
            self._input_files[m.groups()[1]] = m.groups()[0]

        logger.info("Selecting tests to run.")
        self._test_list = []
        if 'all' in inputs:
            self._test_list = self._input_files.keys()
        else:
            for inp in inputs:
                if inp in self._input_files.keys():
                    self._test_list.append(inp)
                else:
                    raise InputError('Unrecognized bioagent test: %s' % inp)

        self._trips_handle = None
        self._bioagent_handle = None
        return
Beispiel #2
0
    def run_all_species(SMILEs):
        '''
        function to run all species for a SIGNLE chemical
        '''
        if len(SMILEs) > 1:
            raise InputError("Only Accept One SMILEs at a Time")

        cur_path = os.path.dirname(__file__)
        all_models = [
            d for d in os.listdir(cur_path + '/../models')
            if os.path.isdir(os.path.join(cur_path + '/../models', d))
        ]
        species = []
        all_p = defaultdict(list)

        for each_model in all_models:
            species.append(each_model)
            this_qsar = qsar(each_model)
            this_p, this_inside, this_error, this_higher, this_lower = this_qsar.predict(
                SMILEs)
            all_p[each_model] = [
                this_p[0][0], this_inside[0], this_error[0], this_higher[0],
                this_lower[0]
            ]

        df = pd.DataFrame.from_dict(all_p, orient='index')
        df.columns = [
            'Prediction', 'Inside AD', 'Prediction Error', 'Prediction Upper',
            'Prediction Lower'
        ]
        return df
def main():
    if len(sys.argv) != 5:
        raise InputError(
            "Usage: python write_tfrecords_test.py args_test_json_path "
            "test_json_dir tfrecord_dir vocab_dir")

    # TODO REFACTOR!!!

    args_test_path = sys.argv[1]
    json_dir = sys.argv[2]
    tfrecord_dir = sys.argv[3]
    vocab_dir = sys.argv[4]

    # find the used arguments
    if os.path.exists(os.path.join(os.path.abspath(vocab_dir), 'args.json')):
        args_path = os.path.join(os.path.abspath(vocab_dir), 'args.json')
    else:
        args_path = os.path.join(vocab_dir,
                                 os.listdir(vocab_dir)[0], 'args.json')

    with open(args_path) as file:
        args_used = json.load(file)

    args = load_json(args_path)

    dataset = Dataset(

        # keep consistent with the training datasets
        max_document_length=args_used['max_document_length'],
        max_vocab_size=args_used['max_vocab_size_allowed'],
        min_frequency=args_used['min_frequency'],
        max_frequency=args_used['max_frequency'],
        padding=args_used.get('padding', args['padding']),
        write_bow=args_used.get('write_bow', args['write_bow']),
        write_tfidf=args_used.get('write_tfidf', args['write_tfidf']),
        tokenizer_=args_used.get('tokenizer', args['tokenizer']),
        stemmer=args_used.get('stemmer', args['stemmer']),
        stopwords=args_used.get('stopwords', args['stopwords']),
        preproc=args_used.get('preproc', args.get('preproc', True)),
        vocab_all=args_used.get('vocab_all', args.get('vocab_all', False)),

        # may be different
        text_field_names=args['text_field_names'],
        label_field_name=args['label_field_name'],
        label_type=args.get('label_type', 'int'),

        # test split only
        train_ratio=0.0,
        valid_ratio=0.0,

        # default in test mode
        json_dir=json_dir,
        tfrecord_dir=tfrecord_dir,
        vocab_dir=vocab_dir,
        generate_basic_vocab=False,
        vocab_given=True,
        vocab_name='vocab_v2i.json',
        generate_tf_record=True)
    def prct_change(self, rate_name, tag=None, shift=1, resample='D', period_start=None, period_end=None, SaveToDB=False):
        data = self.manager.get_raw_data(RateName=rate_name, Tag=tag)[2][['date', 'float_value']]
        data = data.set_index(data['date'])['float_value']
        indexx = pd.Index(pd.to_datetime(data.index))
        data = pd.DataFrame(data)
        data = data.set_index(indexx)

        if data.shape[1] != 1:
            raise InputError(data, 'Shape more than 1')

        if resample != 'D':
            data = data.resample(resample, how=lambda x: x[-1])
        data = data.pct_change(periods=1)
        if resample == 'W':
            data = data*7/365*100
        elif resample == 'M':
            data = data*30/365*100
        elif resample == 'D':
            data = data*1/365*100

        if SaveToDB:
            category = self.manager.get_raw_data(rate_name)[0][['description', 'name', 'parent_name']]
            rates = self.manager.get_raw_data(rate_name)[1][['category_name', 'name', 'source', 'tag']]

            rateshistory = pd.DataFrame()
            rate_name = rates.name.values[0]
            col_name = data.columns.values[0]
            for idx in data.index:
                rateshistory = rateshistory.append(
                    {'rates_name': rate_name, 'date': idx, 'float_value': data.get_value(idx, col_name),
                     'string_value': None, 'tag': 'PC[{0}]'.format(shift)}, ignore_index=True)

            source = rates['source'].values[0]
            self.manager.save_raw_data(category, rates, rateshistory, source)
            try:
                tag = self.manager.session.query(Rates.tag).filter(Rates.name == rate_name).one()
                if tag[0] is None:
                    tag_new = 'PC[{0}]'.format(shift)
                else:
                    tag_new = tag[0] + '|PC[{0}]'.format(shift)
                self.manager.session.query(Rates).filter(Rates.name == rate_name).update({"tag": tag_new})
                self.manager.session.commit()
            except Exception as e:
                self.session.rollback()
                raise e

            return data
        else:
            return data
def zonStat_selectedArea(inputCSV, hdm_outRasterPath, gfa_outRasterPath,
                         population=0, resolution=100):
    '''
    This function calculates the sum of demand within a pixels with given
    resolution. The pixel will also overlay to the standard fishnet used for
    the hotmaps toolbox since the multiplying factor matches to distances from
    the origin of the standard fishnet. The code assumes a resolution of
    100x100 m for the output.
    annual building demand must be in kWh/a
    output heat density map raster is in MWh/ha
    '''
    if isinstance(inputCSV, pd.DataFrame):
        ifile = inputCSV
    else:
        if not os.path.isfile(inputCSV):
            raise InputError('The input csv file does not exist!')
        ifile = pd.read_csv(inputCSV)
    demand = ifile['demand'].values
    GFA = ifile['GFA'].values
    if np.sum(GFA):
        GFA_valid = True
    else:
        GFA_valid = False
    X = ifile['X_3035'].values
    Y = ifile['Y_3035'].values
    x0 = resolution * np.floor(np.min(X)/resolution).astype(int)
    y0 = resolution * np.ceil(np.max(Y)/resolution).astype(int)
    rasterOrigin = (x0, y0)
    xIndex = np.floor((X-x0)/resolution).astype(int)
    yIndex = np.floor((y0-Y)/resolution).astype(int)
    xWidth = np.max(xIndex) - np.min(xIndex) + 1
    yWidth = np.max(yIndex) - np.min(yIndex) + 1
    index = xIndex + xWidth * yIndex
    # The number of rows of "index" and "demand" must be equal.
    sortedData = np.asarray(sorted(zip(index, demand), key=lambda x: x[0]))
    sortedData_GFA = np.asarray(sorted(zip(index, GFA), key=lambda x: x[0]))
    unique, counts = np.unique(index, return_counts=True)
    end = np.cumsum(counts)
    st = np.concatenate((np.zeros((1)), end[0:end.size-1]))
    # xIndex and yIndex start from 0. So they should be added by 1
    sumDem = np.zeros((np.max(xIndex)+1)*(np.max(yIndex)+1))
    item_location = 0
    if GFA_valid:
        sumGFA = np.zeros_like(sumDem)
        for item in unique:
            # sum of demand for each index
            startIndex = int(st[item_location])
            endIndex = int(end[item_location])
            sumDem[item] = np.sum(sortedData[startIndex:endIndex, 1])
            sumGFA[item] = np.sum(sortedData_GFA[startIndex:endIndex, 1])
            item_location += 1
    else:
        for item in unique:
            # sum of demand for each index
            startIndex = int(st[item_location])
            endIndex = int(end[item_location])
            sumDem[item] = np.sum(sortedData[startIndex:endIndex, 1])
            item_location += 1
    '''
    xWidth and yWidth in the following refer to columns and rows,
    respectively and should not wrongly be considered as coordination!
    '''
    # kWh/ha = 10^(-3) * MWh/ha
    sumDem = 0.001 * sumDem.reshape((yWidth, xWidth))
    geo_transform = [rasterOrigin[0], resolution, 0
                     , rasterOrigin[1], 0, -resolution]
    CM19.main(hdm_outRasterPath, geo_transform, str(sumDem.dtype), sumDem)
    abs_heat_demand = np.sum(demand)
    if GFA_valid:
        # gross floor area density map
        sumGFA = sumGFA.reshape((yWidth, xWidth))
        CM19.main(gfa_outRasterPath, geo_transform, str(sumGFA.dtype), sumGFA)
        mean_spec_demand = abs_heat_demand/np.sum(GFA)
    else:
        mean_spec_demand = np.nan
    if population:
        mean_dem_perCapita = abs_heat_demand/float(population)
    else:
        mean_dem_perCapita = np.nan
#     print("Absolute heat demand: %0.1f GWh\a"
#           "Mean heat demand per capita: %0.2f kWh\n"
#           "Mean heat demand per heated surface (ave. specific demand): %0.2f"
#           " kWh/m2"
#           % (abs_heat_demand*10**(-6), mean_dem_perCapita, mean_spec_demand))
    return (abs_heat_demand*10**(-6), mean_dem_perCapita, mean_spec_demand)
Beispiel #6
0
def main():
    if len(sys.argv) != 5:
        raise InputError(
            "Usage: python write_tfrecords_finetune.py dataset_name "
            "args_finetune_json_path finetune_json_dir vocab_dir")

    dataset_name = sys.argv[1]
    args_finetune_path = sys.argv[2]
    json_dir = sys.argv[3]
    vocab_dir = sys.argv[4]

    # find the used arguments
    if os.path.exists(os.path.join(os.path.abspath(vocab_dir), 'args.json')):
        args_path = os.path.join(os.path.abspath(vocab_dir), 'args.json')
    else:
        args_path = os.path.join(vocab_dir, os.listdir(vocab_dir)[0],
                                 'args.json')

    with open(args_path) as file:
        args_used = json.load(file)

    args = load_json(args_finetune_path)

    tfrecord_dir = os.path.join("data/tf/single/", dataset_name)
    # tfrecord_dir_name = \
    #   "min_" + str(args['min_frequency']) + \
    #   "_max_" + str(args['max_frequency']) + \
    #   "_vocab_" + str(args['max_vocab_size']) + \
    #   "_doc_" + str(args['max_document_length']) + \
    #   "_tok_" + args['tokenizer'].replace('_tokenizer', '')
    # tfrecord_dir = os.path.join(tfrecord_dir, tfrecord_dir_name)
    tfrecord_dir_name = os.path.basename(vocab_dir)
    tfrecord_dir = os.path.join(tfrecord_dir, tfrecord_dir_name)

    dataset = Dataset(

        # TODO keep consistent with the training datasets?
        max_document_length=args_used['max_document_length'],
        max_vocab_size=args_used['max_vocab_size_allowed'],
        min_frequency=args_used['min_frequency'],
        max_frequency=args_used['max_frequency'],
        # padding=args_used.get('padding', args['padding']),
        # write_bow=args_used.get('write_bow', args['write_bow']),
        # write_tfidf=args_used.get('write_tfidf', args['write_tfidf']),
        # tokenizer_=args_used.get('tokenizer', args['tokenizer']),
        # preproc=args_used.get('preproc', args.get('preproc', True)),
        # vocab_all=args_used.get('vocab_all', args.get('vocab_all', False)),
        padding=args_used['padding'],
        write_bow=args_used['write_bow'],
        write_tfidf=args_used['write_tfidf'],
        tokenizer_=args_used['tokenizer_'],
        stemmer=args_used['stemmer'],
        stopwords=args_used['stopwords'],
        preproc=args_used['preproc'],
        vocab_all=args_used['vocab_all'],

        # may be different
        text_field_names=args['text_field_names'],
        label_field_name=args['label_field_name'],
        label_type=args.get('label_type', 'int'),

        train_ratio=args['train_ratio'],
        valid_ratio=args['train_ratio'],

        # default in finetune mode
        json_dir=json_dir,
        tfrecord_dir=tfrecord_dir,
        vocab_dir=vocab_dir,
        generate_basic_vocab=False,
        vocab_given=True,
        vocab_name='vocab_v2i.json',
        generate_tf_record=True
    )
def main():
    if len(sys.argv) != 5:
        raise InputError(
            "Usage: python write_tfrecords_predict.py dataset_args_path "
            "predict_json_path predict_tf_path vocab_dir")

    dataset_args_path = sys.argv[1]
    predict_json_path = sys.argv[2]
    predict_tf_path = sys.argv[3]
    vocab_dir = sys.argv[4]

    # find the used arguments
    if os.path.exists(os.path.join(os.path.abspath(vocab_dir), 'args.json')):
        args_path = os.path.join(os.path.abspath(vocab_dir), 'args.json')
    else:
        args_path = os.path.join(
            vocab_dir, os.listdir(vocab_dir)[0], 'args.json')

    with open(args_path) as file:
        args_used = json.load(file)

    if not os.path.exists(os.path.dirname(predict_tf_path)):
        make_dir(os.path.dirname(predict_tf_path))

    # args_DATASET.json or args_merged.json which has min_freq, max_freq,
    # max_document_length etc. information, which are used to further build
    # vocabulary

    args = load_json(dataset_args_path)
    print(args)

    dataset = Dataset(

        # keep consistent with the training datasets
        max_document_length=args_used['max_document_length'],
        max_vocab_size=args_used['max_vocab_size_allowed'],
        min_frequency=args_used['min_frequency'],
        max_frequency=args_used['max_frequency'],
        # padding=args_used.get('padding', args.get('padding', False)),
        # write_bow=args_used.get('write_bow', args.get('write_bow', False)),
        # write_tfidf=args_used.get('write_tfidf', args.get('write_tfidf', False)),
        # tokenizer_=args_used.get('tokenizer', args['tokenizer']),
        # preproc=args_used.get('preproc', args.get('preproc', True)),
        # vocab_all=args_used.get('vocab_all', args.get('vocab_all', False)),

        # use new arguments
        padding=args.get('padding', args_used.get('padding', False)),
        write_bow=args.get('write_bow', args_used.get('write_bow', False)),
        write_tfidf=args.get('write_tfidf',
                             args_used.get('write_tfidf', False)),
        tokenizer_=args.get('tokenizer', args_used.get('tokenizer_',
                                                       'lower_tokenizer')),
        stemmer=args.get('stemmer', args_used.get('stemmer', 'porter_stemmer')),
        stopword=args.get('stopwords', args_used.get('stopwords', 'nltk')),
        preproc=args.get('preproc', args_used.get('preproc', True)),
        vocab_all=args.get('vocab_all', args_used.get('vocab_all', False)),

        # may be different
        text_field_names=args['text_field_names'],
        label_field_name=args['label_field_name'],
        label_type=args.get('label_type', 'int'),

        # default in predict mode
        json_dir=None,
        tfrecord_dir=None,
        vocab_dir=vocab_dir,
        generate_basic_vocab=False,
        vocab_given=True,
        vocab_name='vocab_v2i.json',
        generate_tf_record=True,
        predict_mode=True,
        predict_json_path=predict_json_path,
        predict_tf_path=predict_tf_path
    )