Esempio n. 1
0
    def test_empty_csv(self, tmpdir):
        file_name = 'empty.csv'
        csv_file = tmpdir.join(file_name)
        csv_file.ensure()

        with pytest.raises(Exception):
            utils.load_table(str(csv_file), 'empty')
Esempio n. 2
0
    def load_table(self, tmpdir, csv_str, table_name, **kwargs):
        file_name = '{}.csv'.format(table_name)
        csv_file = tmpdir.join(file_name)
        csv_file.write(csv_str)

        utils.load_table(str(csv_file), table_name, **kwargs)
        self.meta.reflect()
Esempio n. 3
0
    def test_empty_csv(self, tmpdir):
        file_name = 'empty.csv'
        csv_file = tmpdir.join(file_name)
        csv_file.ensure()

        with pytest.raises(Exception):
            utils.load_table(str(csv_file), 'empty')
Esempio n. 4
0
def apify(filename, tablename=None):
    tablename = tablename or utils.get_name(filename)
    logger.info('Importing {0} to table {1}'.format(filename, tablename))
    utils.drop_table(tablename)
    utils.load_table(filename, tablename)
    utils.index_table(tablename, config.CASE_INSENSITIVE)
    logger.info('Finished importing {0}'.format(filename))
Esempio n. 5
0
    def load_table(self, tmpdir, csv_str, table_name, **kwargs):
        file_name = '{}.csv'.format(table_name)
        csv_file = tmpdir.join(file_name)
        csv_file.write(csv_str)

        utils.load_table(str(csv_file), table_name, **kwargs)
        self.meta.reflect()
Esempio n. 6
0
def apify(filename, tablename=None):
    tablename = tablename or utils.get_name(filename)
    logger.info('Importing {0} to table {1}'.format(filename, tablename))
    utils.drop_table(tablename)
    utils.load_table(filename, tablename)
    utils.index_table(tablename, config.CASE_INSENSITIVE)
    logger.info('Finished importing {0}'.format(filename))
Esempio n. 7
0
def gather_fit_data(fit_count, maxfev):
    fn_format = "fits.fit_count={fit_count}_maxfev={maxfev}.txt"
    try:
        return utils.load_table(fn_format.format(**locals()))
    except OSError:
        pass

    d = utils.filter_preferred_ml(utils.load_all())
    d = d[~d["method"].isin(["imsrg[f]+eom[n]"])]
    results = []
    with multiprocessing.Pool(4) as p:
        results = p.map(
            functools.partial(gather_fit_data_inner,
                              fit_count=fit_count,
                              maxfev=maxfev),
            tuple(
                d.groupby(
                    ["label", "interaction", "num_filled", "freq", "method"])))
    d = pd.DataFrame.from_records(itertools.chain(*results))
    print("{} fits failed, out of {}".format(
        (d["fit_method"] == "fixedab").sum(), len(d)))
    # fit_count=5:
    #  maxfev=default: 198 fits failed, out of 2247
    #  maxfev=10k: 40 fits failed, out of 2248
    #  maxfev=100k: 0 fits failed

    cols = """
    interaction
    label
    freq
    num_filled
    method
    best_chisq
    best_coefficient
    best_coefficient_err
    best_constant
    best_constant_err
    best_fixedab_constant_err
    best_exponent
    best_exponent_err
    best_fit_method
    best_fit_stop
    chisq
    coefficient
    coefficient_err
    constant
    constant_err
    fixedab_constant_err
    exponent
    exponent_err
    fit_method
    fit_stop
    rel_discrep
    rel_discrep_err
    rel_dist
    rel_dist_err
    """.split()
    assert len(d.columns) == len(cols)
    utils.save_table(fn_format.format(**locals()), d[cols])
    return d
Esempio n. 8
0
    def _compute_denseSeq(self, det_file):
        '''
        Function for computing all dense sequences given the detections. Is only used during initialization (__init__)
        
        Input:      det_file:           <str>, name and path to the '.csv' file with all detections
                                        (required filed names: {'videos','frames','x1','y1','x2','y2'})  
        Output:     self.seq_dense:     <list of dictionaries, size: #videos> contains one dictionary per video containing all necessary information for loading the
                                        sequences during the training/testing phase
        '''
        detections = load_table(
            det_file,
            asDict=False)  #the detections are given in an excel or csv file

        self.videos = np.unique(detections['videos'].values)
        self.N_vids = len(self.videos)

        self.seq_dense = []  #initialize dictionary
        for v in range(self.N_vids):

            frames, coords, group, orig_size = get_vid_info(
                detections, self.videos[v], self.crops_saved)

            #it might be that some detections inbetween are missing, so find out where we have a gap
            gap = np.where(((frames[1:] - frames[:-1]) > 1) * 1)[0]
            if len(gap) == 0:  #if all frames are consecutive
                connectSeq = [range(len(frames))]
            else:  #get all connected sequences
                connectSeq = [range(gap[0] + 1)]  #first sequence
                connectSeq.extend([
                    range(gap[i] + 1, gap[i + 1] + 1)
                    for i in range(len(gap) - 1)
                ])  #all sequences inbetween
                connectSeq.extend([range(gap[-1] + 1,
                                         len(frames))])  #last sequence

            #create the dense sequences
            idx_dense = []
            for i in range(len(connectSeq)):
                seq = connectSeq[i]
                #idx_dense.extend([range(seq[i],seq[i]+self.F) for i in range(len(seq)-self.F+1)])
                idx_dense.extend([
                    range(seq[i], seq[i] + self.skip_frames * self.F,
                          self.skip_frames) for i in range(
                              len(seq) - (self.F * self.skip_frames) +
                              self.skip_frames)
                ])

            #save all information for the dense sequences
            idx_dense = np.array(idx_dense)
            frames_dense = frames[idx_dense]
            coords_dense = coords[idx_dense]
            group_dense = group[idx_dense]
            seq_dense_v = {
                'video': self.videos[v],
                'group': group_dense,
                'frames': frames_dense,
                'coords': coords_dense,
                'orig_size': orig_size
            }
            self.seq_dense.append(seq_dense_v)
Esempio n. 9
0
def gather_fit_data(fit_count, maxfev):
    fn_format = "fits.fit_count={fit_count}_maxfev={maxfev}.txt"
    try:
        return utils.load_table(fn_format.format(**locals()))
    except OSError:
        pass

    d = utils.filter_preferred_ml(utils.load_all())
    d = d[~d["method"].isin(["imsrg[f]+eom[n]"])]
    results = []
    with multiprocessing.Pool(4) as p:
        results = p.map(
            functools.partial(gather_fit_data_inner,
                              fit_count=fit_count,
                              maxfev=maxfev),
            tuple(d.groupby(["label", "interaction", "num_filled",
                             "freq", "method"])))
    d = pd.DataFrame.from_records(itertools.chain(*results))
    print("{} fits failed, out of {}"
          .format((d["fit_method"] == "fixedab").sum(), len(d)))
    # fit_count=5:
    #  maxfev=default: 198 fits failed, out of 2247
    #  maxfev=10k: 40 fits failed, out of 2248
    #  maxfev=100k: 0 fits failed

    cols = """
    interaction
    label
    freq
    num_filled
    method
    best_chisq
    best_coefficient
    best_coefficient_err
    best_constant
    best_constant_err
    best_fixedab_constant_err
    best_exponent
    best_exponent_err
    best_fit_method
    best_fit_stop
    chisq
    coefficient
    coefficient_err
    constant
    constant_err
    fixedab_constant_err
    exponent
    exponent_err
    fit_method
    fit_stop
    rel_discrep
    rel_discrep_err
    rel_dist
    rel_dist_err
    """.split()
    assert len(d.columns) == len(cols)
    utils.save_table(fn_format.format(**locals()), d[cols])
    return d
Esempio n. 10
0
 def load(self):
     logger.info('###################### load data #######################')
     for pair in self.pairs:
         logger.info('[load] =====%s====' % pair)
         dst_table = pair['dst_table']
         table_file_path = os.path.join(self.data_dir, dst_table+'.bin')
         tmp_table = 'tmp_'+dst_table+'_for_sync'
         # load data
         utils.load_table(
             self.dst_db, self.dst_cursor, tmp_table, table_file_path)
         logger.info('[load] from [file:{file_path}] to \
             [db:{db}-table:{table}]'.format(
             file_path=table_file_path,
             db=self.dst_db,
             table=tmp_table))
         # postload 
         custom_table = self.customs.get(dst_table, None)
         if custom_table and getattr(custom_table, 'postload', None):
             custom_table.postload(self.dst_cursor)
Esempio n. 11
0
File: tasks.py Progetto: 18F/autoapi
def apify(filename, tablename=None):
    try:
        filenames = glob.glob(filename, recursive=True)
    except TypeError:  # recursive glob in Python 3.5+ only
        filenames = glob.glob(filename)
    if len(filenames) > 1 and tablename:
        raise Exception("Can't specify a `tablename` for >1 file")
    for filename in filenames:
        _tablename = tablename or utils.get_name(filename)
        logger.info('Importing {0} to table {1}'.format(filename, _tablename))
        try:
            utils.drop_table(_tablename)
        except sa.exc.OperationalError as e:
            logger.debug('DROP TABLE {} failed, may not exist?'.format(
                _tablename))
            logger.debug(str(e))
        try:
            utils.load_table(filename, _tablename)
        except Exception as e:
            logger.error('Failed to load table from file {}'.format(filename))
            logger.error(str(e))
        logger.info('Finished importing {0}'.format(filename))
Esempio n. 12
0
def apify(filename, tablename=None):
    try:
        filenames = glob.glob(filename, recursive=True)
    except TypeError:  # recursive glob in Python 3.5+ only
        filenames = glob.glob(filename)
    if len(filenames) > 1 and tablename:
        raise Exception("Can't specify a `tablename` for >1 file")
    for filename in filenames:
        _tablename = tablename or utils.get_name(filename)
        logger.info('Importing {0} to table {1}'.format(filename, _tablename))
        try:
            utils.drop_table(_tablename)
        except sa.exc.OperationalError as e:
            logger.debug(
                'DROP TABLE {} failed, may not exist?'.format(_tablename))
            logger.debug(str(e))
        try:
            utils.load_table(filename, _tablename)
        except Exception as e:
            logger.error('Failed to load table from file {}'.format(filename))
            logger.error(str(e))
        logger.info('Finished importing {0}'.format(filename))
Esempio n. 13
0
	def setup(self, workpath, datapath, jss, datapathtype, format_index):
		self.jss = jss
		self.dirname = workpath
		self.datapath = datapath
		self.datapathtype = datapathtype
		self.data_format = self.namespace['data_format'][format_index]
		# setup job center
		self.JobCenter = jobc.JobCenter(self.jss, self.dirname, self.data_format, self)
		# load table
		self.process_data = utils.load_table(os.path.join(self.dirname, self.namespace['project_structure'][0]))
		# load table change log, if there exists
		self.rawdata_changelog = utils.load_changelog(os.path.join(self.dirname, self.namespace['project_structure'][0]))
		# set up tag_buffer
		self.tag_buffer = {}
		for assm in self.namespace['process_assignments']:
			self.tag_buffer[assm] = {}
		# write jss to UI
		if self.jss is not None:
			self.ui.comboBox_2.addItem(self.jss)
		# setup all tabWidgets
		# setup process
		for assm in self.namespace['process_assignments']:
			self.ui.comboBox.addItem(assm)
		self.ui.tableWidget.horizontalHeader().setResizeMode(QtGui.QHeaderView.Interactive)
		self.ui.label_73.setText( utils.fmt_process_status(self.data_format) )
		self.ui.pushButton_2.setVisible(False)
		# setup classify
		for decp in self.namespace['classify_decomp']:
			self.ui.comboBox_3.addItem(decp)
		self.ui.lineEdit.setText(os.path.join(os.path.join(self.dirname, \
			self.namespace['project_structure'][0]), self.namespace['process_HF']))
		self.ui.lineEdit_2.setText("Hits/data")
		self.ui.widget_12.setVisible(False)
		self.ui.widget_11.setVisible(False)
		# setup merge
		for sym in self.namespace['merge_sym']:
			self.ui.comboBox_5.addItem(sym)
		# setup phasing
		for method in self.namespace['phasing_method']:
			self.ui.comboBox_9.addItem(method)
			self.ui.comboBox_10.addItem(method)
			self.ui.comboBox_11.addItem(method)
		# setup monitors
		self.table_monitor = None
		# draw table
		self.refresh_table()
Esempio n. 14
0
    def __init__(self, opt):

        self.data_path = opt.Paths['img']
        table = load_table(opt.Paths['detections'],asDict=False)#np.load(opt.Paths['dic'], encoding="latin1").item()
        self.info = {'videos':table['videos'].values,
                     'frames':table['frames'].values}
        ### Removing missing crops
        # detecions_iterator = tqdm(zip(self.info['videos'],self.info['frames']),desc='Validate detections')
        # selection = [os.path.exists('%s%s/%06d.jpg'%(self.data_path,v,f))
        #              for i, (v, f) in enumerate(detecions_iterator)]
        
        # self.info['videos'] = self.info['videos'][selection]
        # self.info['frames'] = self.info['frames'][selection]
        ### Determine length of our training set
        uni_videos = np.unique(self.info['videos'])
        if opt.Training['size'] is None:
            self.length = len(self.info['frames'])
        else:
            per_video = int(opt.Training['size']/len(uni_videos))
            #self.length = min(opt.Training['size'],len(self.info['frames']))
            selection = [np.where(self.info['videos']==v)[0][:per_video] 
                            for v in uni_videos]#np.random.permutation(len(self.info['frames']))
            selection = np.concatenate(selection)
            self.info['videos'] = self.info['videos'][selection]
            self.info['frames'] = self.info['frames'][selection]
            self.length = len(self.info['frames'])
        
        ### Load fc6 features into the RAM
        #self.fc6 = np.zeros((self.length, opt.Network['feature_size']))

        data_iter = tqdm(uni_videos,position=2)
        data_iter.set_description('Load Posture Representation')
        
        self.fc6 = []
        for i, v in enumerate(data_iter):
            frames = self.info['frames'][self.info['videos']==v]
            features_file = np.load(opt.Paths['fc6'] + v + '.npz')
            selection = [np.where(features_file['frames']==frame)[0][0] 
                         for frame in frames 
                         if frame in features_file['frames']]
            self.fc6.append(features_file['fc6'][selection])
            # self.fc6.append(features_file['fc6'])
        
        self.fc6 = np.concatenate(self.fc6,0)
        assert len(self.fc6.shape)==2, 'Features not properly concatenated'
        assert self.fc6.shape[0]==self.info['videos'].shape[0],'Wrong number of features loaded: %d - %d'%(self.info['videos'].shape[0],self.fc6.shape[0])
Esempio n. 15
0
    def __init__(self, opt):

        self.data_path = opt.Paths['img']
        self.frame_path = opt.Paths['frame_path']
        table = load_table(opt.Paths['detections'], asDict=False)
        self.info = {
            'videos': table['videos'].values,
            'frames': table['frames'].values
        }
        ### Determine length of our training set
        uni_videos = np.unique(self.info['videos'])
        if opt.Training['size'] is None:
            self.length = len(self.info['frames'])
        else:
            per_video = int(opt.Training['size'] / len(uni_videos))
            #self.length = min(opt.Training['size'],len(self.info['frames']))
            selection = [
                np.where(self.info['videos'] == v)[0][:per_video]
                for v in uni_videos
            ]  #np.random.permutation(len(self.info['frames']))
            selection = np.concatenate(selection)
            self.info['videos'] = self.info['videos'][selection]
            self.info['frames'] = self.info['frames'][selection]
            self.length = len(self.info['frames'])

        ### Load fc6 features into the RAM
        data_iter = tqdm(uni_videos, position=2)
        data_iter.set_description('Load Posture Representation')

        self.fc6 = []
        for i, v in enumerate(data_iter):
            frames = self.info['frames'][self.info['videos'] == v]
            features_file = np.load(opt.Paths['fc6'] + v + '.npz')
            selection = [
                np.where(features_file['frames'] == frame)[0][0]
                for frame in frames if frame in features_file['frames']
            ]
            self.fc6.append(features_file['fc6'][selection])
            # self.fc6.append(features_file['fc6'])

        self.fc6 = np.concatenate(self.fc6, 0)
        assert len(self.fc6.shape) == 2, 'Features not properly concatenated'
        assert self.fc6.shape[0] == self.info['videos'].shape[
            0], 'Wrong number of features loaded: %d - %d' % (
                self.info['videos'].shape[0], self.fc6.shape[0])
Esempio n. 16
0
def load_full_fit_data(fit_count=DEFAULT_FIT_COUNT, maxfev=DEFAULT_MAXFEV):
    '''Load fit data from file if available.  Otherwise calculate the fits.'''
    fn = "fit_data.fit_count={fit_count}_maxfev={maxfev}.txt".format(
        **locals())
    try:
        return utils.load_table(fn)
    except OSError:
        pass

    sys.stderr.write("Fit data has not yet been calculated.  "
                     "This may take a few minutes...\n")
    sys.stderr.flush()
    d = utils.filter_preferred_ml(utils.load_all())
    d = d[~d["method"].isin(["imsrg[f]+eom[n]"])]
    with multiprocessing.Pool(4) as p:
        results_s, missing_num_shells = zip(*p.map(
            functools.partial(
                gather_fit_data, fit_count=fit_count, maxfev=maxfev),
            tuple(
                d.groupby([
                    "label", "interaction", "num_filled", "freq", "method"
                ]))))
    results = itertools.chain(*results_s)

    missing_fn = ("fits_missing_points."
                  "fit_count={fit_count}_maxfev={maxfev}.log".format(
                      **locals()))
    utils.save_table(missing_fn.format(**locals()),
                     pd.DataFrame.from_records(missing_num_shells))
    sys.stderr.write("Missing data points logged to: {}\n".format(missing_fn))
    sys.stderr.flush()

    d = pd.DataFrame.from_records(results)
    num_failed = (d["fit_method"] == "fixedab").sum()
    if num_failed:
        sys.stderr.write("{} out of {} fits failed\n".format(
            num_failed, len(d)))
        sys.stderr.flush()

    utils.save_table(fn, d)
    return d
Esempio n. 17
0
            D[j, :] = d[:100 * k]

    return D, I


# if __name__ == "__main__":
############################################
# 1. Load Features
############################################
results_fold = '%s/similarity/nearestNeighbor/%s/' % (cfg.results_path,
                                                      args.feature_type)
if not os.path.exists(results_fold): os.makedirs(results_fold)

#videos = load_table(cfg.video_path+'/vid_list.csv',index=None,asDict=True).values()
#if len(videos)>1: videos = videos[1].values()
detections = load_table(cfg.detection_file, asDict=False)
uni_videos = np.unique(detections['videos'].values)

if 'fc6' or 'fc7' in args.feature_type:
    print(
        'Chosen features: "%s". Compute %i Nearest Neighbors of %i randomly chosen postures. The Results will be saved in "%s".'
        % (args.feature_type, args.nn_per_query, args.number_of_queries,
           results_fold))
elif 'lstm' in args.feature_type:
    print(
        'Chosen features: "%s". Compute %i Nearest Neighbors of %i randomly chosen sequences. The Results will be saved in "%s".'
        % (args.feature_type, args.nn_per_query, args.number_of_queries,
           results_fold))
else:
    raise ValueError(
        'Chosen Features (%s) are not available. Please choose "fc6", "fc7" or "fc6fc7" for posture features or "lstm" for sequence features.'
Esempio n. 18
0
video_files = []
for root, dirnames, filenames in os.walk(cfg.video_path):
    video_files.extend(glob(root + "/*." + cfg.video_format))

for vid in video_files:
    vid_list = vid_list.append(
        {'videos': vid[len(cfg.video_path):-(len(cfg.video_format) + 1)]},
        ignore_index=True)

vid_list.to_csv(cfg.video_path + '/vid_list.csv')

############################################
# 1. Extract the frames
############################################
frames_crop = load_table(
    cfg.frames_crop,
    index='videos')  #load the cropping coordinates in dictionary format

#find all video files in the given directory
video_files = []
for root, dirnames, filenames in os.walk(cfg.video_path):
    video_files.extend(glob(root + "/*." + cfg.video_format))

N = len(video_files)
print('Number of found videos: %i \n Start extracting frames...' % N)

for v in trange(N):
    vid_fullPath = video_files[v]
    vid = vid_fullPath[len(cfg.video_path):-(len(cfg.video_format) + 1)]

    #define output folder
Esempio n. 19
0
def plot(fit_count=5, log=False, maxfev=0, plot_type="scatter",
         stat="err", hf=False):
    dorig = utils.filter_preferred_ml(utils.load_all())

    d_good = utils.load_table("fits_good.txt")
    d_good = d_good.groupby(
        ["interaction", "label", "freq", "num_filled", "method"]
    ).first()

    d = gather_fit_data(fit_count=fit_count, maxfev=maxfev)

    d = d[d["interaction"] == "normal"]
    # d = d[d["label"] == "add"]
    # d = d[d["method"] == "imsrg"]
    # d = d[d["num_filled"] == 5]
    # d = d[d["freq"] == 0.1]
    d = d[d["fit_method"] != "fixedab"]

    doriggrps = dorig.groupby(["interaction", "label", "freq",
                               "num_filled", "method"])

    d["rel_constant_err"] = d["constant_err"] / d["constant"]
    d["rel_best_constant_err"] = d["best_constant_err"] / d["best_constant"]
    d["label_is_ground"] = d["label"] == "ground"
    d["good"] = d.apply(lambda r: d_good.loc[
        (r["interaction"], r["label"], r["freq"],
         r["num_filled"], r["method"])]["good"], axis=1)
    d["rel_chi"] = d["chisq"]**.5 / d["constant"]
    d["rel_reduced_chi"] = d["rel_chi"] / (fit_count - 3)
    d["rel_best_chisq"] = d["best_chisq"]**.5 / d["best_constant"]
    d["rel_best_reduced_chisq"] = d["rel_best_chisq"] / (fit_count - 3)
    d = d[(d["rel_best_reduced_chisq"] < 1e-6)]
    d["fixedab_with_hf"] = (
        ((d["fit_method"] == "fixedab") ==
         (d["method"].isin(["hf", "hf+qdpt3"]))) |
        (d["fit_method"] == "full")
    )

    color_col = "method"
    bin_transform = TRANSFORM_ID
    bin_transform = TRANSFORM_LOG_ABS

    if color_col in ["exponent", "rel_dist", "rel_constant_err",
                     "rel_best_constant_err", "rel_best_chisq",
                     "rel_chi", "rel_reduced_chi", "chi_ratio"]:
        num_bins = 16
        d = d.replace([np.inf, -np.inf], np.nan).dropna(subset=[color_col])
        binf = bin_transform[0]
        bininvf = bin_transform[1]
        color_bins = pd.cut(binf(abs(d[color_col])), num_bins)
        d["color_bin_start"] = color_bins.map(
            lambda bin: bininvf(parse_bin(bin)[0]))
        d["color_bin_stop"] = color_bins.map(
            lambda bin: bininvf(parse_bin(bin)[1]))
        color_bin_cols = ["color_bin_start", "color_bin_stop"]
    else:
        color_bin_cols = [color_col]
    max_bins = len(d[color_bin_cols[0]].unique())

    fig, ax = plt.subplots()

    def on_pick_event(event):
        x = list(event.artist.get_xdata()[event.ind])[0]
        y = list(event.artist.get_ydata()[event.ind])[0]
        sel = d[(abs(d["x"] - x) <= 1e-20) &
                (abs(d["y"] - y) <= 1e-20)]
        print(sel.transpose().to_csv())
        if len(sel) != 1:
            print('>> not found <<')
            return

        sel = sel.iloc[0]
        grp = doriggrps.get_group((sel["interaction"], sel["label"],
                                   sel["freq"], sel["num_filled"],
                                   sel["method"]))
        fig, ax = plt.subplots(2)
        ax[0].plot(grp["num_shells"], grp["energy"], "x")
        fit_start = sel["fit_stop"] + 1 - fit_count
        ax[0].axvspan(fit_start, sel["fit_stop"], color="#cccccc")
        xs = np.linspace(grp["num_shells"].min(), grp["num_shells"].max())
        ax[0].plot(xs,
                   sel["coefficient"] * xs ** sel["exponent"]
                   + sel["constant"])
        subgrp = grp[grp["num_shells"].between(
            fit_start-0.1, sel["fit_stop"]+0.1)]
        last_constant = sel["constant"]
        last_constant_err = sel["constant_err"]

        def random_weight(count):
            weights = np.zeros(count)
            for i in range(count):
                weights[np.random.randint(0, count)] += 1
            return weights

        p0 = [sel["coefficient"], sel["exponent"], sel["constant"]]
        p = p0
        x = subgrp["num_shells"]
        y = subgrp["energy"]
        constants = []
        constants.append(p[2])

        print(f"x = np.array({list(x)})")
        print(f"y = np.array({list(y)})")

        ax[1].plot(x, (p[0] * x ** p[1] + p[2] - y), "-x")
        ax[1].axhline(0.0, linestyle=":")

        for i in range(10):
            count = len(x)
            weights = random_weight(count) + 1e-99
            if sum(weights > 0.1) <= 3: # can't fit with this few points
                continue
            try:
                p, cov = scipy.optimize.curve_fit(
                    lambda x, a, b, c: a * x ** b + c,
                    x, y,
                    sigma=1.0 / weights ** 0.5,
                    p0=p0, maxfev=100000)
            except RuntimeError as e:
                print(e)
                continue
            chisq = np.average((p[0] * x ** p[1] + p[2] - y) ** 2,
                               weights=weights) * len(x)
            constant = p[2]
            constant_err = cov[2, 2] ** 0.5
            constants.append(p[2])
            last_constant = constant
            last_constant_err = constant_err
        print("result", np.mean(constants), np.std(constants))
        print("rel", np.std(constants) / np.mean(constants))
        ax[0].set_ylim([max(ax[0].get_ylim()[0], 0.0),
                        min(ax[0].get_ylim()[1], np.max(y))])
        ax[0].plot(xs, p[0] * xs ** p[1] + p[2], ":", color="lime")

    fig.canvas.mpl_connect("pick_event", on_pick_event)

    d["quality"] = np.log10(d["constant_err"]/d["chisq"]**0.5)

    # hf has unique behaviors (what about mp2?)
    if hf:
        d = d[d["method"] == "hf"]
    else:
        d = d[d["method"] != "hf"]
        if stat == "err":
            d = d[d["quality"] > 0]

    d["y"] = d["rel_discrep"] / d["rel_constant_err"]
    if stat == "hessian":
        d["x"] = d["quality"]
    elif stat == "err":
        d["x"] = np.log10(d["rel_constant_err"])
    else:
        assert False

    d["y_err"] = d["rel_discrep_err"] / d["rel_discrep"] * d["y"]

    d = d[(d["rel_constant_err"] > 0) & (d["rel_constant_err"] < np.inf)]
    if plot_type == "contour":

        if hf:
            hf_suffix = "HF"
        else:
            hf_suffix = "non-HF"
        # ranged = mesh; lim = view
        if stat == "err":
            if hf:
                nx = 20
                ny = 20
                xrange = (-7, -1)
                yrange = (-5, 5)
                xlim = (-6, -2)
                ylim = (-4, 4)
                title = ("discrepancy vs fit uncertainty "
                         f"({hf_suffix})")
            else:
                nx = 20
                ny = 40
                xrange = (-7, -1)
                yrange = (-50, 50)
                xlim = (-6, -2)
                ylim = (-40, 40)
                title = ("discrepancy vs fit uncertainty "
                         f"(filtered: Q > 0, {hf_suffix})")
            xlabel = r"$\log_{10}\left(\frac{\sigma_c}{c}\right)$"
            ylabel = r"$\frac{\varepsilon}{\sigma_c}$"
        elif stat == "hessian":
            if hf:
                nx = 20
                ny = 40
                xrange = (-0.6, 1.5)
                yrange = (-10, 10)
                xlim = (-0.4, 1.1)
                ylim = (-10, 10)
            else:
                nx = 20
                ny = 40
                xrange = (-0.6, 2.1)
                yrange = (-200, 200)
                xlim = (-0.4, 1.7)
                ylim = (-150, 150)
            title = ("spread of “actual” discrepancy vs quality "
                     f"({hf_suffix})")
            xlabel = (r"$Q = \log_{10}\left(\frac{\sigma_c}"
                       r"{\sqrt{\mathtt{RSS}}}\right)$")
            ylabel = r"$\frac{\varepsilon}{\sigma_c}$"
        else:
            assert False
        ax.plot(d["x"], d["y"], "o", markersize=1, picker=3,
                color="white", markeredgewidth=0)
        dx = (xrange[1] - xrange[0]) / (nx - 1)
        h, x, y = np.histogram2d(d["x"], d["y"], bins=(nx, ny - 1),
                                 range=((xrange[0] - 0.5 * dx,
                                         xrange[1] + 0.5 * dx),
                                        yrange))
        ch = np.concatenate([np.zeros((nx, 1)),
                             np.cumsum(h, axis=1)],
                            axis=1)
        z = ch / ch[..., -1, np.newaxis]
        x, y = np.meshgrid(0.5 * (x[1:] + x[:-1]), y, indexing="ij")
        levels = np.linspace(-2.0, 2.0, 5)
        levels = 0.5 + 0.5 * scipy.special.erf(2.0 ** -0.5 * levels)
        ax.axhline(-1.0, linestyle="--", color="white", linewidth=0.5)
        ax.axhline(1.0, linestyle="--", color="white", linewidth=0.5)
        ax.contour(x, y, z,
                   levels=levels,
                   colors="white",
                   alpha=0.3)
        cs = ax.contourf(x, y, z,
                         levels=np.linspace(0.0, 1.0, 300),
                         # note: translucent cmaps tend to cause artifacts
                         cmap=CMAP_FOLDED_VIRIDIS,
                         linestyle=":")
        for c in cs.collections: # http://stackoverflow.com/a/32911283
            c.set_edgecolor("face")
        fig.colorbar(cs)
        ax.set_title(title)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)

    elif plot_type == "scatter":

        cmap = utils.CMAP_RAINBOW
        for i, (bin, g) in enumerate(sorted(d.groupby(color_bin_cols))):
            color = cmap(float(i) / max_bins)
            ax.plot(g["x"], g["y"], ".",
                    label=str(bin),
                    color=color,
                    markersize=10,
                    markeredgewidth=0,
                    alpha=0.5,
                    picker=2)
        ax.legend()

    fn = f"fit-predictiveness-{plot_type}-{fit_count}-{stat}-{hf}"
    fig.tight_layout()
    utils.savefig(fig, fn)
Esempio n. 20
0
#!/usr/bin/env python3
import functools, os, sys
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import utils

utils.init(__file__)

d = utils.load_table("compare_rel_slopes.txt")

# we want to use (interaction, freq, num_particles) as an axis
# but matplotlib doesn't let us do that
# so we have to use this awful hack
x_labels = [
    "'_sigmaA=0.5_sigmaB=4.0', 1.0, 6",
    "'_sigmaA=0.5_sigmaB=4.0', 1.0, 12",
    "'_sigmaA=0.5_sigmaB=4.0', 1.0, 20",
    "'_sigmaA=0.5_sigmaB=4.0', 0.28, 6",
    "'_sigmaA=0.5_sigmaB=4.0', 0.28, 12",
    "'_sigmaA=0.5_sigmaB=4.0', 0.28, 20",
    "'_sigmaA=0.5', 1.0, 6",
    "'_sigmaA=0.5', 1.0, 12",
    "'_sigmaA=0.5', 1.0, 20",
    "'_sigmaA=0.5', 0.28, 6",
    "'_sigmaA=0.5', 0.28, 12",
    "'_sigmaA=0.5', 0.28, 20",
    "'_sigmaB=4.0', 1.0, 6",
    "'_sigmaB=4.0', 1.0, 12",
    "'_sigmaB=4.0', 1.0, 20",
    "'_sigmaB=4.0', 0.28, 6",
Esempio n. 21
0
#!/usr/bin/env python3
import functools, os
import matplotlib.pyplot as plt
import pandas as pd
import utils

utils.init(__file__)

d = utils.load_table("imsrg-qdpt/dat_arenergy_by_ml.txt")
d = d[["num_shells", "num_filled", "freq", "ml", "label", "energy"]]
d["method"] = "qdpt"
dq = d

d = utils.load_table("EOM_IMSRG_qd_attached.dat")
d["energy"] = d["E(N+1)-E(N)"]
d = d[["shells", "filled", "ML", "omega", "energy"]]
d = d.rename(columns={
    "shells": "num_shells",
    "filled": "num_filled",
    "ML": "ml",
    "omega": "freq",
})
d["label"] = "add"
d["method"] = "eom"
dea = d

d = utils.load_table("EOM_IMSRG_qd_removed.dat")
d["energy"] = -d["E(N-1)-E(N)"]
d = d[["shells", "filled", "ML", "omega", "energy"]]
d = d.rename(columns={
    "shells": "num_shells",
Esempio n. 22
0
                                       is_multimaster=args.multimaster)
     elif args.obj_type == 'volume':
         utils.autosetup_replica_table_multithread(
             volume_path=args.path,
             replica_parent=args.replica,
             num_replica=args.numreplica,
             is_multimaster=args.multimaster)
     else:
         logging.error('Unrecognized object. Cannot create.')
         sys.exit(-1)
 elif args.cmd_name == 'load':
     logging.debug('Load command')
     if args.obj_type == 'table':
         utils.load_table(table_name=args.path,
                          num_cfs=args.numcfs,
                          num_cols=args.numcols,
                          num_rows=args.numrows,
                          is_json=args.json)
     elif args.obj_type == 'volume':
         utils.load_volume_tables_multithread(volume_path=args.path,
                                              num_cfs=args.numcfs,
                                              num_cols=args.numcols,
                                              num_rows=args.numrows,
                                              is_json=args.json)
     else:
         logging.error('Unrecognized object. Cannot create.')
         sys.exit(-1)
 elif args.cmd_name == 'replstatus':
     logging.debug('Replica status tracking')
     if args.obj_type == 'table':
         utils.get_replica_status(table_name=args.path, fields=args.filter)
Esempio n. 23
0
def plot(fit_count=fits.DEFAULT_FIT_COUNT, log=False,
         maxfev=fits.DEFAULT_MAXFEV, plot_type="scatter",
         stat="err", hf=False):
    dorig = utils.filter_preferred_ml(utils.load_all())

    d_good = utils.load_table("fits_good.txt")
    d_good = d_good.groupby(
        ["interaction", "label", "freq", "num_filled", "method"]
    ).first()

    d = fits.load_predictive_data(fit_count=fit_count, maxfev=maxfev)

    d = d[d["interaction"] == "normal"]
    # d = d[d["label"] == "add"]
    # d = d[d["method"] == "imsrg"]
    # d = d[d["num_filled"] == 5]
    # d = d[d["freq"] == 0.1]
    d = d[d["fit_method"] != "fixedab"]

    doriggrps = dorig.groupby(["interaction", "label", "freq",
                               "num_filled", "method"])

    d["rel_constant_err"] = d["constant_err"] / d["constant"]
    d["rel_best_constant_err"] = d["best_constant_err"] / d["best_constant"]
    d["label_is_ground"] = d["label"] == "ground"
    d["good"] = d.apply(functools.partial(is_good, d_good), axis=1)
    d["rel_chi"] = d["chisq"]**.5 / d["constant"]
    d["rel_reduced_chi"] = d["rel_chi"] / (fit_count - 3)
    d["rel_best_chisq"] = d["best_chisq"]**.5 / d["best_constant"]
    d["rel_best_reduced_chisq"] = d["rel_best_chisq"] / (fit_count - 3)
    d = d[(d["rel_best_reduced_chisq"] < 1e-6)]
    d["fixedab_with_hf"] = (
        ((d["fit_method"] == "fixedab") ==
         (d["method"].isin(["hf", "hf+qdpt3"]))) |
        (d["fit_method"] == "full")
    )

    color_col = "method"
    bin_transform = TRANSFORM_ID
    bin_transform = TRANSFORM_LOG_ABS

    if color_col in ["exponent", "rel_dist", "rel_constant_err",
                     "rel_best_constant_err", "rel_best_chisq",
                     "rel_chi", "rel_reduced_chi", "chi_ratio"]:
        num_bins = 16
        d = d.replace([np.inf, -np.inf], np.nan).dropna(subset=[color_col])
        binf = bin_transform[0]
        bininvf = bin_transform[1]
        color_bins = pd.cut(binf(abs(d[color_col])), num_bins)
        d["color_bin_start"] = color_bins.map(
            lambda bin: bininvf(parse_bin(bin)[0]))
        d["color_bin_stop"] = color_bins.map(
            lambda bin: bininvf(parse_bin(bin)[1]))
        color_bin_cols = ["color_bin_start", "color_bin_stop"]
    else:
        color_bin_cols = [color_col]
    max_bins = len(d[color_bin_cols[0]].unique())

    fig, ax = plt.subplots()

    def on_pick_event(event):
        x = list(event.artist.get_xdata()[event.ind])[0]
        y = list(event.artist.get_ydata()[event.ind])[0]
        sel = d[(abs(d["x"] - x) <= 1e-20) &
                (abs(d["y"] - y) <= 1e-20)]
        print(sel.transpose().to_csv())
        if len(sel) != 1:
            print('>> not found <<')
            return

        sel = sel.iloc[0]
        grp = doriggrps.get_group((sel["interaction"], sel["label"],
                                   sel["freq"], sel["num_filled"],
                                   sel["method"]))
        fig, ax = plt.subplots(2)
        ax[0].plot(grp["num_shells"], grp["energy"], "x")
        fit_start = sel["fit_stop"] + 1 - fit_count
        ax[0].axvspan(fit_start, sel["fit_stop"], color="#cccccc")
        xs = np.linspace(grp["num_shells"].min(), grp["num_shells"].max())
        ax[0].plot(xs,
                   sel["coefficient"] * xs ** sel["exponent"]
                   + sel["constant"])
        subgrp = grp[grp["num_shells"].between(
            fit_start-0.1, sel["fit_stop"]+0.1)]
        last_constant = sel["constant"]
        last_constant_err = sel["constant_err"]

        def random_weight(count):
            weights = np.zeros(count)
            for i in range(count):
                weights[np.random.randint(0, count)] += 1
            return weights

        p0 = [sel["coefficient"], sel["exponent"], sel["constant"]]
        p = p0
        x = subgrp["num_shells"]
        y = subgrp["energy"]
        constants = []
        constants.append(p[2])

        print(f"x = np.array({list(x)})")
        print(f"y = np.array({list(y)})")

        ax[1].plot(x, (p[0] * x ** p[1] + p[2] - y), "-x")
        ax[1].axhline(0.0, linestyle=":")

        for i in range(10):
            count = len(x)
            weights = random_weight(count) + 1e-99
            if sum(weights > 0.1) <= 3: # can't fit with this few points
                continue
            try:
                p, cov = scipy.optimize.curve_fit(
                    lambda x, a, b, c: a * x ** b + c,
                    x, y,
                    sigma=1.0 / weights ** 0.5,
                    p0=p0, maxfev=100000)
            except RuntimeError as e:
                print(e)
                continue
            chisq = np.average((p[0] * x ** p[1] + p[2] - y) ** 2,
                               weights=weights) * len(x)
            constant = p[2]
            constant_err = cov[2, 2] ** 0.5
            constants.append(p[2])
            last_constant = constant
            last_constant_err = constant_err
        print("result", np.mean(constants), np.std(constants))
        print("rel", np.std(constants) / np.mean(constants))
        ax[0].set_ylim([max(ax[0].get_ylim()[0], 0.0),
                        min(ax[0].get_ylim()[1], np.max(y))])
        ax[0].plot(xs, p[0] * xs ** p[1] + p[2], ":", color="lime")

    fig.canvas.mpl_connect("pick_event", on_pick_event)

    d["quality"] = np.log10(d["constant_err"]/d["chisq"]**0.5)

    # hf has unique behaviors (what about mp2?)
    if hf:
        d = d[d["method"] == "hf"]
    else:
        d = d[d["method"] != "hf"]
        if stat == "err":
            d = d[d["quality"] > 0]

    d["y"] = d["rel_discrep"] / d["rel_constant_err"]
    if stat == "hessian":
        d["x"] = d["quality"]
    elif stat == "err":
        d["x"] = np.log10(d["rel_constant_err"])
    else:
        assert False

    d["y_err"] = d["rel_discrep_err"] / d["rel_discrep"] * d["y"]

    d = d[(d["rel_constant_err"] > 0) & (d["rel_constant_err"] < np.inf)]
    if plot_type == "contour":

        if hf:
            hf_suffix = "HF"
        else:
            hf_suffix = "non-HF"
        # ranged = mesh; lim = view
        if stat == "err":
            if hf:
                nx = 20
                ny = 20
                xrange = (-7, -1)
                yrange = (-5, 5)
                xlim = (-6, -2)
                ylim = (-4, 4)
                title = ("discrepancy vs fit uncertainty "
                         f"({hf_suffix})")
            else:
                nx = 20
                ny = 40
                xrange = (-7, -1)
                yrange = (-50, 50)
                xlim = (-6, -2)
                ylim = (-40, 40)
                title = ("discrepancy vs fit uncertainty "
                         f"(filtered: Q > 0, {hf_suffix})")
            xlabel = r"$\log_{10}\left(\frac{\sigma_c}{c}\right)$"
            ylabel = r"$\frac{\varepsilon}{\sigma_c}$"
        elif stat == "hessian":
            if hf:
                nx = 20
                ny = 40
                xrange = (-0.6, 1.5)
                yrange = (-10, 10)
                xlim = (-0.4, 1.1)
                ylim = (-10, 10)
            else:
                nx = 20
                ny = 40
                xrange = (-0.6, 2.1)
                yrange = (-200, 200)
                xlim = (-0.4, 1.7)
                ylim = (-150, 150)
            title = ("spread of “actual” discrepancy vs quality "
                     f"({hf_suffix})")
            xlabel = (r"$Q = \log_{10}\left(\frac{\sigma_c}"
                       r"{\sqrt{\mathtt{RSS}}}\right)$")
            ylabel = r"$\frac{\varepsilon}{\sigma_c}$"
        else:
            assert False
        ax.plot(d["x"], d["y"], "o", markersize=1, picker=3,
                color="white", markeredgewidth=0)
        dx = (xrange[1] - xrange[0]) / (nx - 1)
        h, x, y = np.histogram2d(d["x"], d["y"], bins=(nx, ny - 1),
                                 range=((xrange[0] - 0.5 * dx,
                                         xrange[1] + 0.5 * dx),
                                        yrange))
        ch = np.concatenate([np.zeros((nx, 1)),
                             np.cumsum(h, axis=1)],
                            axis=1)
        z = ch / ch[..., -1, np.newaxis]
        x, y = np.meshgrid(0.5 * (x[1:] + x[:-1]), y, indexing="ij")
        levels = np.linspace(-2.0, 2.0, 5)
        levels = 0.5 + 0.5 * scipy.special.erf(2.0 ** -0.5 * levels)
        ax.axhline(-1.0, linestyle="--", color="white", linewidth=0.5)
        ax.axhline(1.0, linestyle="--", color="white", linewidth=0.5)
        ax.contour(x, y, z,
                   levels=levels,
                   colors="white",
                   alpha=0.3)
        cs = ax.contourf(x, y, z,
                         levels=np.linspace(0.0, 1.0, 300),
                         # note: translucent cmaps tend to cause artifacts
                         cmap=CMAP_FOLDED_VIRIDIS,
                         linestyle=":")
        for c in cs.collections: # http://stackoverflow.com/a/32911283
            c.set_edgecolor("face")
        fig.colorbar(cs)
        ax.set_title(title)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)

    elif plot_type == "scatter":

        cmap = utils.CMAP_RAINBOW
        for i, (bin, g) in enumerate(sorted(d.groupby(color_bin_cols))):
            color = cmap(float(i) / max_bins)
            ax.plot(g["x"], g["y"], ".",
                    label=str(bin),
                    color=color,
                    markersize=10,
                    markeredgewidth=0,
                    alpha=0.5,
                    picker=2)
        ax.legend()

    fn = f"fit-predictiveness-{plot_type}-{fit_count}-{stat}-{hf}"
    fig.tight_layout()
    utils.savefig(fig, fn)
Esempio n. 24
0
#!/usr/bin/env python3
import os, re, sys
sys.path.insert(1, os.path.join(os.path.dirname(__file__), ".."))
import utils

fn = re.match(r"(.*)-postprocess\.py", __file__).group(1) + ".txt"
d = utils.load_table(fn)
# canonicalization can introduce duplicates, in addition to whatever
# duplicates that already exist in the file
d["p"] = d["p"].map(utils.canonicalize_p)
d = utils.check_fun_dep(d,
                        ["interaction", "num_shells", "num_filled", "freq",
                         "method", "p", "term_id"],
                        {"correction": 1e-7},
                        combiner=utils.rightmost_combiner)
d = d.sort_values(["interaction", "num_shells", "num_filled", "freq",
                   "method", "p", "term_id", "correction"])
with open(fn, "w") as f:
    f.write("""
# term_ids 3 and 4: QDPT2
# term_ids 5 to 22: QDPT3
#
# Functional dependencies:
#
#   * (num_shells, num_filled, freq, method, p, term_id) -> correction
#
"""[1:])
    utils.save_table(f, d)
Esempio n. 25
0
# fill Isomer table
# todo: retention time/ figure
isomer = swgdrug[['formula']]
isomer.insert(1, 'retention_time', np.nan)  # waar halen we die vandaan?
isomer.insert(2, 'figure', np.nan)  # waar halen we die vandaan?
isomer.to_sql('Isomer',
              con=config.db_connection,
              index=False,
              if_exists='append')

# fill label table
label = swgdrug[['formula', 'name']]
label.insert(2, 'preference', np.nan)
label.insert(3, 'isomer_id', np.nan)
isomer_table = load_table('Isomer')

# loop over the isomer table and get index of the formula name.
# for now we can only match on formula name, and there can be multiple formulas with the same name
# we match on the first match found, and this match will then be removed from isomer_table.
for index, row in label.iterrows():
    formula, name = row['formula'], row['name']
    try:
        id_match = isomer_table[isomer_table['formula'] ==
                                formula].iloc[0]['id']
    except:
        raise ValueError(
            "Formula name '{}' does not exist in Isomer table".format(formula))
    isomer_table = isomer_table[isomer_table.id != id_match]
    label.at[index, 'isomer_id'] = id_match
Esempio n. 26
0
#!/usr/bin/env python3
import os, re, sys
sys.path.insert(1, os.path.join(os.path.dirname(__file__), ".."))
import utils

fn = re.match(r"(.*)-postprocess\.py", __file__).group(1) + ".txt"
d = utils.load_table(fn)
d = utils.check_fun_dep(
    d, ["interaction", "num_shells", "num_filled", "freq", "method"],
    {"energy": 2e-5},
    combiner=utils.rightmost_combiner)
d = d.sort_values(
    ["interaction", "num_shells", "num_filled", "freq", "method", "energy"])
with open(fn, "w") as f:
    f.write("""
# Functional dependencies:
#
#   * (interaction, num_shells, num_filled, freq, method) -> energy
#
"""[1:])
    utils.save_table(f, d)
Esempio n. 27
0
#!/usr/bin/env python3
import functools, os, sys
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import utils

utils.init(__file__)

d = utils.load_table("compare_rel_slopes.txt")

# we want to use (interaction, freq, num_particles) as an axis
# but matplotlib doesn't let us do that
# so we have to use this awful hack
x_labels = [
    "'_sigmaA=0.5_sigmaB=4.0', 1.0, 6",
    "'_sigmaA=0.5_sigmaB=4.0', 1.0, 12",
    "'_sigmaA=0.5_sigmaB=4.0', 1.0, 20",
    "'_sigmaA=0.5_sigmaB=4.0', 0.28, 6",
    "'_sigmaA=0.5_sigmaB=4.0', 0.28, 12",
    "'_sigmaA=0.5_sigmaB=4.0', 0.28, 20",
    "'_sigmaA=0.5', 1.0, 6",
    "'_sigmaA=0.5', 1.0, 12",
    "'_sigmaA=0.5', 1.0, 20",
    "'_sigmaA=0.5', 0.28, 6",
    "'_sigmaA=0.5', 0.28, 12",
    "'_sigmaA=0.5', 0.28, 20",
    "'_sigmaB=4.0', 1.0, 6",
    "'_sigmaB=4.0', 1.0, 12",
    "'_sigmaB=4.0', 1.0, 20",
    "'_sigmaB=4.0', 0.28, 6",
Esempio n. 28
0
#!/usr/bin/env python3
import itertools, sys
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import utils

with utils.plot(__file__):
    d = utils.load_table("compare_rel_slopes.txt", na_filter=False)

    # we want to use (num_particles, freq) as an axis
    # but matplotlib doesn't let us do that
    # so we have to use this awful hack
    interactions = [
        ("", r"$(0, \infty)$"),
        ("_sigmaB=4.0", r"$(0, 4)$"),
        ("_sigmaA=0.5", r"$(\frac{1}{2}, \infty)$"),
        ("_sigmaA=0.5_sigmaB=4.0", r"$(\frac{1}{2}, 4)$"),
    ]
    interaction_colors = {
        "": "#a883e4",
        "_sigmaB=4.0": "#951c16",
        "_sigmaA=0.5": "#005e93",
        "_sigmaA=0.5_sigmaB=4.0": "#1e1e1e",
    }
    interaction_markers = {
        "": "o",
        "_sigmaB=4.0": "X",
        "_sigmaA=0.5": "o",
        "_sigmaA=0.5_sigmaB=4.0": "x",
    }
Esempio n. 29
0
def get_model(input_dim, hid_nodes=16):
    #X_train, y_train, X_test, y_test = split_data(df)

    model = kr.models.Sequential()
    #model.add(kr.layers.Dense(hid_nodes, input_dim=2 * (end - start + 1), activation='relu'))
    model.add(kr.layers.Dense(hid_nodes, input_dim=input_dim, activation='relu'))
    
    model.add(kr.layers.Dense(1, activation='linear'))

    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

    return model


# Kumpula
df_n = load_table(neighbor, 2017)[features]
X_n, y_n = add_shifted_features(df_n, features, start=start, end=end, step=step, forecast_step=forecast_step)
#df_n = df_n.drop(features, axis=1)

df_t = load_table(target_area, 2017)[features]
X_t, y_t = add_shifted_features(df_t, features, start=start, end=end, step=step, forecast_step=forecast_step)
y = y_t[target_feat]
#df_t = df_t.drop(features, axis=1)

X = pd.merge(X_t, X_n, on='Date', suffixes=['_'+neighbor, '_'+target_area])

#X_data = df_All
X_train, y_train, X_test, y_test = split_data(X, y, n=n_preds)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)